Add onigmo 5.13.5

This commit is contained in:
KingToolbox 2020-07-17 02:26:10 +08:00
parent cf11cf128d
commit 432b7676da
118 changed files with 116383 additions and 0 deletions

28
src/Onigmo/.editorconfig Normal file
View File

@ -0,0 +1,28 @@
; see: http://editorconfig.org/
root = true
[*]
end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true
[**.[ch]]
indent_style = tab
indent_size = 2
tab_width = 8
[**.py]
indent_style = space
indent_size = 4
[**.rb]
indent_style = space
indent_size = 2
tab_width = 8
[win32/*]
end_of_line = crlf
[win32/*.py]
end_of_line = lf

66
src/Onigmo/.gitignore vendored Normal file
View File

@ -0,0 +1,66 @@
# ignore dot-files, binary files and backup files
.*
*.o
*.lo
*.so
*.a
*.la
*.obj
*.def
*.dll
*.exe
*.exp
*.lib
*.pyc
*.bak
*.BAK
*~
*.swp
*.orig
*.rej
*.RES
*.res
# don't want to ignore
!.gitignore
!.editorconfig
# working dirs
.deps
.libs
# autotools generated files
/autom4te.cache
/config.h
/config.log
/config.status
/libtool
/onig-config
/oniguruma.pc
/Makefile
/sample/Makefile
/stamp-h1
# generated executable files
/enc/mktable
/sample/crnl
/sample/encode
/sample/listcap
/sample/names
/sample/posix
/sample/simple
/sample/sql
/sample/syntax
/testc
/testcu
/testp
# tag files
tags
TAGS
# GNU global files
GPATH
GRTAGS
GSYMS
GTAGS

2
src/Onigmo/AUTHORS Normal file
View File

@ -0,0 +1,2 @@
kentkt AT csc DOT jp (K.Takata)
sndgk393 AT ybb DOT ne DOT jp (K.Kosako)

87
src/Onigmo/COPYING Normal file
View File

@ -0,0 +1,87 @@
Onigmo (Oniguruma-mod) LICENSE
------------------------------
/*-
* Copyright (c) 2002-2009 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2011-2013 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
Oniguruma LICENSE
-----------------
/*-
* Copyright (c) 2002-2009 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
Ruby BSDL
---------
Copyright (C) 1993-2013 Yukihiro Matsumoto. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.

2369
src/Onigmo/HISTORY Normal file

File diff suppressed because it is too large Load Diff

365
src/Onigmo/INSTALL Normal file
View File

@ -0,0 +1,365 @@
Installation Instructions
*************************
Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005,
2006, 2007, 2008, 2009 Free Software Foundation, Inc.
Copying and distribution of this file, with or without modification,
are permitted in any medium without royalty provided the copyright
notice and this notice are preserved. This file is offered as-is,
without warranty of any kind.
Basic Installation
==================
Briefly, the shell commands `./configure; make; make install' should
configure, build, and install this package. The following
more-detailed instructions are generic; see the `README' file for
instructions specific to this package. Some packages provide this
`INSTALL' file but do not implement all of the features documented
below. The lack of an optional feature in a given package is not
necessarily a bug. More recommendations for GNU packages can be found
in *note Makefile Conventions: (standards)Makefile Conventions.
The `configure' shell script attempts to guess correct values for
various system-dependent variables used during compilation. It uses
those values to create a `Makefile' in each directory of the package.
It may also create one or more `.h' files containing system-dependent
definitions. Finally, it creates a shell script `config.status' that
you can run in the future to recreate the current configuration, and a
file `config.log' containing compiler output (useful mainly for
debugging `configure').
It can also use an optional file (typically called `config.cache'
and enabled with `--cache-file=config.cache' or simply `-C') that saves
the results of its tests to speed up reconfiguring. Caching is
disabled by default to prevent problems with accidental use of stale
cache files.
If you need to do unusual things to compile the package, please try
to figure out how `configure' could check whether to do them, and mail
diffs or instructions to the address given in the `README' so they can
be considered for the next release. If you are using the cache, and at
some point `config.cache' contains results you don't want to keep, you
may remove or edit it.
The file `configure.ac' (or `configure.in') is used to create
`configure' by a program called `autoconf'. You need `configure.ac' if
you want to change it or regenerate `configure' using a newer version
of `autoconf'.
The simplest way to compile this package is:
1. `cd' to the directory containing the package's source code and type
`./configure' to configure the package for your system.
Running `configure' might take a while. While running, it prints
some messages telling which features it is checking for.
2. Type `make' to compile the package.
3. Optionally, type `make check' to run any self-tests that come with
the package, generally using the just-built uninstalled binaries.
4. Type `make install' to install the programs and any data files and
documentation. When installing into a prefix owned by root, it is
recommended that the package be configured and built as a regular
user, and only the `make install' phase executed with root
privileges.
5. Optionally, type `make installcheck' to repeat any self-tests, but
this time using the binaries in their final installed location.
This target does not install anything. Running this target as a
regular user, particularly if the prior `make install' required
root privileges, verifies that the installation completed
correctly.
6. You can remove the program binaries and object files from the
source code directory by typing `make clean'. To also remove the
files that `configure' created (so you can compile the package for
a different kind of computer), type `make distclean'. There is
also a `make maintainer-clean' target, but that is intended mainly
for the package's developers. If you use it, you may have to get
all sorts of other programs in order to regenerate files that came
with the distribution.
7. Often, you can also type `make uninstall' to remove the installed
files again. In practice, not all packages have tested that
uninstallation works correctly, even though it is required by the
GNU Coding Standards.
8. Some packages, particularly those that use Automake, provide `make
distcheck', which can by used by developers to test that all other
targets like `make install' and `make uninstall' work correctly.
This target is generally not run by end users.
Compilers and Options
=====================
Some systems require unusual options for compilation or linking that
the `configure' script does not know about. Run `./configure --help'
for details on some of the pertinent environment variables.
You can give `configure' initial values for configuration parameters
by setting variables in the command line or in the environment. Here
is an example:
./configure CC=c99 CFLAGS=-g LIBS=-lposix
*Note Defining Variables::, for more details.
Compiling For Multiple Architectures
====================================
You can compile the package for more than one kind of computer at the
same time, by placing the object files for each architecture in their
own directory. To do this, you can use GNU `make'. `cd' to the
directory where you want the object files and executables to go and run
the `configure' script. `configure' automatically checks for the
source code in the directory that `configure' is in and in `..'. This
is known as a "VPATH" build.
With a non-GNU `make', it is safer to compile the package for one
architecture at a time in the source code directory. After you have
installed the package for one architecture, use `make distclean' before
reconfiguring for another architecture.
On MacOS X 10.5 and later systems, you can create libraries and
executables that work on multiple system types--known as "fat" or
"universal" binaries--by specifying multiple `-arch' options to the
compiler but only a single `-arch' option to the preprocessor. Like
this:
./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
CPP="gcc -E" CXXCPP="g++ -E"
This is not guaranteed to produce working output in all cases, you
may have to build one architecture at a time and combine the results
using the `lipo' tool if you have problems.
Installation Names
==================
By default, `make install' installs the package's commands under
`/usr/local/bin', include files under `/usr/local/include', etc. You
can specify an installation prefix other than `/usr/local' by giving
`configure' the option `--prefix=PREFIX', where PREFIX must be an
absolute file name.
You can specify separate installation prefixes for
architecture-specific files and architecture-independent files. If you
pass the option `--exec-prefix=PREFIX' to `configure', the package uses
PREFIX as the prefix for installing programs and libraries.
Documentation and other data files still use the regular prefix.
In addition, if you use an unusual directory layout you can give
options like `--bindir=DIR' to specify different values for particular
kinds of files. Run `configure --help' for a list of the directories
you can set and what kinds of files go in them. In general, the
default for these options is expressed in terms of `${prefix}', so that
specifying just `--prefix' will affect all of the other directory
specifications that were not explicitly provided.
The most portable way to affect installation locations is to pass the
correct locations to `configure'; however, many packages provide one or
both of the following shortcuts of passing variable assignments to the
`make install' command line to change installation locations without
having to reconfigure or recompile.
The first method involves providing an override variable for each
affected directory. For example, `make install
prefix=/alternate/directory' will choose an alternate location for all
directory configuration variables that were expressed in terms of
`${prefix}'. Any directories that were specified during `configure',
but not in terms of `${prefix}', must each be overridden at install
time for the entire installation to be relocated. The approach of
makefile variable overrides for each directory variable is required by
the GNU Coding Standards, and ideally causes no recompilation.
However, some platforms have known limitations with the semantics of
shared libraries that end up requiring recompilation when using this
method, particularly noticeable in packages that use GNU Libtool.
The second method involves providing the `DESTDIR' variable. For
example, `make install DESTDIR=/alternate/directory' will prepend
`/alternate/directory' before all installation names. The approach of
`DESTDIR' overrides is not required by the GNU Coding Standards, and
does not work on platforms that have drive letters. On the other hand,
it does better at avoiding recompilation issues, and works well even
when some directory options were not specified in terms of `${prefix}'
at `configure' time.
Optional Features
=================
If the package supports it, you can cause programs to be installed
with an extra prefix or suffix on their names by giving `configure' the
option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
Some packages pay attention to `--enable-FEATURE' options to
`configure', where FEATURE indicates an optional part of the package.
They may also pay attention to `--with-PACKAGE' options, where PACKAGE
is something like `gnu-as' or `x' (for the X Window System). The
`README' should mention any `--enable-' and `--with-' options that the
package recognizes.
For packages that use the X Window System, `configure' can usually
find the X include and library files automatically, but if it doesn't,
you can use the `configure' options `--x-includes=DIR' and
`--x-libraries=DIR' to specify their locations.
Some packages offer the ability to configure how verbose the
execution of `make' will be. For these packages, running `./configure
--enable-silent-rules' sets the default to minimal output, which can be
overridden with `make V=1'; while running `./configure
--disable-silent-rules' sets the default to verbose, which can be
overridden with `make V=0'.
Particular systems
==================
On HP-UX, the default C compiler is not ANSI C compatible. If GNU
CC is not installed, it is recommended to use the following options in
order to use an ANSI C compiler:
./configure CC="cc -Ae -D_XOPEN_SOURCE=500"
and if that doesn't work, install pre-built binaries of GCC for HP-UX.
On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot
parse its `<wchar.h>' header file. The option `-nodtk' can be used as
a workaround. If GNU CC is not installed, it is therefore recommended
to try
./configure CC="cc"
and if that doesn't work, try
./configure CC="cc -nodtk"
On Solaris, don't put `/usr/ucb' early in your `PATH'. This
directory contains several dysfunctional programs; working variants of
these programs are available in `/usr/bin'. So, if you need `/usr/ucb'
in your `PATH', put it _after_ `/usr/bin'.
On Haiku, software installed for all users goes in `/boot/common',
not `/usr/local'. It is recommended to use the following options:
./configure --prefix=/boot/common
Specifying the System Type
==========================
There may be some features `configure' cannot figure out
automatically, but needs to determine by the type of machine the package
will run on. Usually, assuming the package is built to be run on the
_same_ architectures, `configure' can figure that out, but if it prints
a message saying it cannot guess the machine type, give it the
`--build=TYPE' option. TYPE can either be a short name for the system
type, such as `sun4', or a canonical name which has the form:
CPU-COMPANY-SYSTEM
where SYSTEM can have one of these forms:
OS
KERNEL-OS
See the file `config.sub' for the possible values of each field. If
`config.sub' isn't included in this package, then this package doesn't
need to know the machine type.
If you are _building_ compiler tools for cross-compiling, you should
use the option `--target=TYPE' to select the type of system they will
produce code for.
If you want to _use_ a cross compiler, that generates code for a
platform different from the build platform, you should specify the
"host" platform (i.e., that on which the generated programs will
eventually be run) with `--host=TYPE'.
Sharing Defaults
================
If you want to set default values for `configure' scripts to share,
you can create a site shell script called `config.site' that gives
default values for variables like `CC', `cache_file', and `prefix'.
`configure' looks for `PREFIX/share/config.site' if it exists, then
`PREFIX/etc/config.site' if it exists. Or, you can set the
`CONFIG_SITE' environment variable to the location of the site script.
A warning: not all `configure' scripts look for a site script.
Defining Variables
==================
Variables not defined in a site shell script can be set in the
environment passed to `configure'. However, some packages may run
configure again during the build, and the customized values of these
variables may be lost. In order to avoid this problem, you should set
them in the `configure' command line, using `VAR=value'. For example:
./configure CC=/usr/local2/bin/gcc
causes the specified `gcc' to be used as the C compiler (unless it is
overridden in the site shell script).
Unfortunately, this technique does not work for `CONFIG_SHELL' due to
an Autoconf bug. Until the bug is fixed you can use this workaround:
CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash
`configure' Invocation
======================
`configure' recognizes the following options to control how it
operates.
`--help'
`-h'
Print a summary of all of the options to `configure', and exit.
`--help=short'
`--help=recursive'
Print a summary of the options unique to this package's
`configure', and exit. The `short' variant lists options used
only in the top level, while the `recursive' variant lists options
also present in any nested packages.
`--version'
`-V'
Print the version of Autoconf used to generate the `configure'
script, and exit.
`--cache-file=FILE'
Enable the cache: use and save the results of the tests in FILE,
traditionally `config.cache'. FILE defaults to `/dev/null' to
disable caching.
`--config-cache'
`-C'
Alias for `--cache-file=config.cache'.
`--quiet'
`--silent'
`-q'
Do not print messages saying which checks are being made. To
suppress all normal output, redirect it to `/dev/null' (any error
messages will still be shown).
`--srcdir=DIR'
Look for the package's source code in directory DIR. Usually
`configure' can determine that directory automatically.
`--prefix=DIR'
Use DIR as the installation prefix. *note Installation Names::
for more details, including other options available for fine-tuning
the installation locations.
`--no-create'
`-n'
Run the configure checks, but stop before creating any output
files.
`configure' also accepts some other, not widely useful, options. Run
`configure --help' for more details.

118
src/Onigmo/Makefile.am Normal file
View File

@ -0,0 +1,118 @@
## Makefile.am for Oniguruma
encdir = $(top_srcdir)/enc
sampledir = $(top_srcdir)/sample
libname = libonig.la
ACLOCAL_AMFLAGS = -I m4
#AM_CFLAGS = -DNOT_RUBY
AM_CFLAGS =
INCLUDES = -I$(top_srcdir) -I$(includedir)
SUBDIRS = . sample
include_HEADERS = oniguruma.h oniggnu.h onigposix.h
lib_LTLIBRARIES = $(libname)
libonig_la_SOURCES = regint.h regparse.h regenc.h st.h \
regerror.c regparse.c regext.c regcomp.c regexec.c reggnu.c \
regenc.c regsyntax.c regtrav.c regversion.c st.c \
regposix.c regposerr.c \
$(encdir)/unicode.c $(encdir)/ascii.c $(encdir)/utf8.c \
$(encdir)/utf16_be.c $(encdir)/utf16_le.c \
$(encdir)/utf32_be.c $(encdir)/utf32_le.c \
$(encdir)/unicode/casefold.h $(encdir)/unicode/name2ctype.h \
$(encdir)/euc_jp.c $(encdir)/sjis.c $(encdir)/cp932.c \
$(encdir)/iso8859_1.c \
$(encdir)/iso8859_2.c $(encdir)/iso8859_3.c \
$(encdir)/iso8859_4.c $(encdir)/iso8859_5.c \
$(encdir)/iso8859_6.c $(encdir)/iso8859_7.c \
$(encdir)/iso8859_8.c $(encdir)/iso8859_9.c \
$(encdir)/iso8859_10.c $(encdir)/iso8859_11.c \
$(encdir)/iso8859_13.c $(encdir)/iso8859_14.c \
$(encdir)/iso8859_15.c $(encdir)/iso8859_16.c \
$(encdir)/euc_tw.c $(encdir)/euc_kr.c $(encdir)/big5.c \
$(encdir)/gb18030.c $(encdir)/koi8_r.c $(encdir)/cp1251.c
libonig_la_LDFLAGS = -version-info $(LTVERSION)
EXTRA_DIST = .gitignore oniguruma.pc.in HISTORY README.ja index.html \
index_ja.html doc/API doc/API.ja doc/RE doc/RE.ja doc/FAQ doc/FAQ.ja \
doc/UnicodeProps.txt \
tool/.gitignore tool/CaseFolding.py tool/convert-name2ctype.sh \
tool/enc-unicode.rb \
win32/Makefile win32/config.h win32/testc.c \
win32/makedef.py win32/onig.rc \
$(encdir)/koi8.c $(encdir)/mktable.c \
$(sampledir)/encode.c $(sampledir)/listcap.c $(sampledir)/names.c \
$(sampledir)/posix.c $(sampledir)/simple.c $(sampledir)/sql.c \
$(sampledir)/syntax.c $(sampledir)/crnl.c \
test.rb testconv.rb testconvu.rb \
onig.py testpy.py
bin_SCRIPTS = onig-config
onig-config: onig-config.in
do_subst = sed \
-e 's,[@]datadir[@],$(datadir),g' \
-e 's,[@]datarootdir[@],$(datarootdir),g' \
-e 's,[@]PACKAGE_VERSION[@],$(PACKAGE_VERSION),g' \
-e 's,[@]prefix[@],$(prefix),g' \
-e 's,[@]exec_prefix[@],$(exec_prefix),g' \
-e 's,[@]libdir[@],$(libdir),g' \
-e 's,[@]includedir[@],$(includedir),g'
oniguruma.pc: $(srcdir)/oniguruma.pc.in Makefile
$(do_subst) < $(<) > $(@)
pkgconfigdir = $(libdir)/pkgconfig
pkgconfig_DATA = oniguruma.pc
dll:
$(CXX) -shared -Wl,--output-def,libonig.def -o libonig.dll *.o \
$(LIBS)
strip libonig.dll
# Ruby TEST
rtest:
$(RUBYDIR)/ruby -w -Ke $(srcdir)/test.rb
# character-types-table source generator
mktable: $(encdir)/mktable.c $(srcdir)/regenc.h
$(CC) -I$(top_srcdir) -o mktable $(encdir)/mktable.c
# TEST
TESTS = testc testp testcu
check_PROGRAMS = testc testp testcu
atest: testc testp testcu
@echo "[Oniguruma API, ASCII/EUC-JP check]"
@$(top_builddir)/testc | grep RESULT
@echo "[POSIX API, ASCII/EUC-JP check]"
@$(top_builddir)/testp | grep RESULT
@echo "[Oniguruma API, UTF-16 check]"
@$(top_builddir)/testcu | grep RESULT
testc_SOURCES = testc.c
testc_LDADD = libonig.la
testp_SOURCES = testc.c
testp_LDADD = libonig.la
testp_CFLAGS = -DPOSIX_TEST
testcu_SOURCES = testu.c
testcu_LDADD = libonig.la
testc.c: $(srcdir)/test.rb $(srcdir)/testconv.rb
ruby -Ke $(srcdir)/testconv.rb < $(srcdir)/test.rb > $@
testu.c: $(srcdir)/test.rb $(srcdir)/testconvu.rb
ruby -Ke $(srcdir)/testconvu.rb $(srcdir)/test.rb > $@
win32/testc.c: $(srcdir)/test.rb $(srcdir)/testconv.rb
ruby -Ke $(srcdir)/testconv.rb -win < $(srcdir)/test.rb | nkf -cs > $@
## END OF FILE

1437
src/Onigmo/Makefile.in Normal file

File diff suppressed because it is too large Load Diff

241
src/Onigmo/README Normal file
View File

@ -0,0 +1,241 @@
README 2013/03/21
Onigmo (Oniguruma-mod) -- (C) K.Takata <kentkt AT csc DOT jp>
https://github.com/k-takata/Onigmo
Onigmo is a regular expressions library forked from Oniguruma.
Some of new features introduced in Perl 5.10+ can be used.
Some patches are merged from Ruby 2.0.0.
Main New features:
Regular Expressions (depends on the syntax):
\K, \R, \X, (?(cond)yes|no)
(?adlu), \g{name}, \g{n}, (?&name), (?n), (?R), (?0)
(?P<name>...), (?P=name), (?P>name)
API:
onig_search_gpos (for Perl-compatible \G)
Encoding:
CP932
Syntax:
Python
New Source Files:
enc/cp932.c CP932 encoding.
enc/unicode/casefold.h Unicode case folding data.
enc/unicode/name2ctype.h Unicode properties data.
onig.py onig.dll/libonig.so loader.
testpy.py test program.
tool/CaseFolding.py generates casefold.h.
tool/convert-name2ctype.sh converts name2ctype.kwd to name2ctypes.h.
tool/enc-unicode.rb generates name2ctype.kwd.
win32/makedef.py creates onig.def.
win32/onig.rc resource file for onig.dll.
ToDo:
* Reduce the size of Unicode Character Data.
* (?|...)
* Improve (?(cond)yes|no). (support look-ahead/behind assertions.)
Oniguruma's README follows:
======================================================================
README 2007/05/31
Oniguruma ---- (C) K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
http://www.geocities.jp/kosako3/oniguruma/
Oniguruma is a regular expressions library.
The characteristics of this library is that different character encoding
for every regular expression object can be specified.
Supported character encodings:
ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,
EUC-JP, EUC-TW, EUC-KR, EUC-CN,
Shift_JIS, Big5, GB18030, KOI8-R, CP1251,
ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,
ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,
ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
* GB18030: contributed by KUBO Takehiro
* CP1251: contributed by Byte
------------------------------------------------------------
License
BSD license.
Install
Case 1: Unix and Cygwin platform
1. ./configure
2. make
3. make install
* uninstall
make uninstall
* test (ASCII/EUC-JP)
make atest
* configuration check
onig-config --cflags
onig-config --libs
onig-config --prefix
onig-config --exec-prefix
Case 2: Win32 platform (VC++)
1. copy win32\Makefile Makefile
2. copy win32\config.h config.h
3. nmake
onig_s.lib: static link library
onig.dll: dynamic link library
* test (ASCII/Shift_JIS)
4. copy win32\testc.c testc.c
5. nmake ctest
Regular Expressions
See doc/RE (or doc/RE.ja for Japanese).
Usage
Include oniguruma.h in your program. (Oniguruma API)
See doc/API for Oniguruma API.
If you want to disable UChar type (== unsigned char) definition
in oniguruma.h, define ONIG_ESCAPE_UCHAR_COLLISION and then
include oniguruma.h.
If you want to disable regex_t type definition in oniguruma.h,
define ONIG_ESCAPE_REGEX_T_COLLISION and then include oniguruma.h.
Example of the compiling/linking command line in Unix or Cygwin,
(prefix == /usr/local case)
cc sample.c -L/usr/local/lib -lonig
If you want to use static link library(onig_s.lib) in Win32,
add option -DONIG_EXTERN=extern to C compiler.
Sample Programs
sample/simple.c example of the minimum (Oniguruma API)
sample/names.c example of the named group callback.
sample/encode.c example of some encodings.
sample/listcap.c example of the capture history.
sample/posix.c POSIX API sample.
sample/sql.c example of the variable meta characters.
(SQL-like pattern matching)
Test Programs
sample/syntax.c Perl, Java and ASIS syntax test.
sample/crnl.c --enable-crnl-as-line-terminator test
Source Files
oniguruma.h Oniguruma API header file. (public)
onig-config.in configuration check program template.
regenc.h character encodings framework header file.
regint.h internal definitions
regparse.h internal definitions for regparse.c and regcomp.c
regcomp.c compiling and optimization functions
regenc.c character encodings framework.
regerror.c error message function
regext.c extended API functions. (deluxe version API)
regexec.c search and match functions
regparse.c parsing functions.
regsyntax.c pattern syntax functions and built-in syntax definitions.
regtrav.c capture history tree data traverse functions.
regversion.c version info function.
st.h hash table functions header file
st.c hash table functions
oniggnu.h GNU regex API header file. (public)
reggnu.c GNU regex API functions
onigposix.h POSIX API header file. (public)
regposerr.c POSIX error message function.
regposix.c POSIX API functions.
enc/mktable.c character type table generator.
enc/ascii.c ASCII encoding.
enc/euc_jp.c EUC-JP encoding.
enc/euc_tw.c EUC-TW encoding.
enc/euc_kr.c EUC-KR, EUC-CN encoding.
enc/sjis.c Shift_JIS encoding.
enc/big5.c Big5 encoding.
enc/gb18030.c GB18030 encoding.
enc/koi8.c KOI8 encoding.
enc/koi8_r.c KOI8-R encoding.
enc/cp1251.c CP1251 encoding.
enc/iso8859_1.c ISO-8859-1 encoding. (Latin-1)
enc/iso8859_2.c ISO-8859-2 encoding. (Latin-2)
enc/iso8859_3.c ISO-8859-3 encoding. (Latin-3)
enc/iso8859_4.c ISO-8859-4 encoding. (Latin-4)
enc/iso8859_5.c ISO-8859-5 encoding. (Cyrillic)
enc/iso8859_6.c ISO-8859-6 encoding. (Arabic)
enc/iso8859_7.c ISO-8859-7 encoding. (Greek)
enc/iso8859_8.c ISO-8859-8 encoding. (Hebrew)
enc/iso8859_9.c ISO-8859-9 encoding. (Latin-5 or Turkish)
enc/iso8859_10.c ISO-8859-10 encoding. (Latin-6 or Nordic)
enc/iso8859_11.c ISO-8859-11 encoding. (Thai)
enc/iso8859_13.c ISO-8859-13 encoding. (Latin-7 or Baltic Rim)
enc/iso8859_14.c ISO-8859-14 encoding. (Latin-8 or Celtic)
enc/iso8859_15.c ISO-8859-15 encoding. (Latin-9 or West European with Euro)
enc/iso8859_16.c ISO-8859-16 encoding.
(Latin-10 or South-Eastern European with Euro)
enc/utf8.c UTF-8 encoding.
enc/utf16_be.c UTF-16BE encoding.
enc/utf16_le.c UTF-16LE encoding.
enc/utf32_be.c UTF-32BE encoding.
enc/utf32_le.c UTF-32LE encoding.
enc/unicode.c Unicode information data.
win32/Makefile Makefile for Win32 (VC++)
win32/config.h config.h for Win32
ToDo
? case fold flag: Katakana <-> Hiragana.
? add ONIG_OPTION_NOTBOS/NOTEOS. (\A, \z, \Z)
?? \X (== \PM\pM*)
?? implement syntax behavior ONIG_SYN_CONTEXT_INDEP_ANCHORS.
?? transmission stopper. (return ONIG_STOP from match_at())
and I'm thankful to Akinori MUSHA.
Mail Address: K.Kosako <sndgk393 AT ybb DOT ne DOT jp>

247
src/Onigmo/README.ja Normal file
View File

@ -0,0 +1,247 @@
README.ja 2013/03/21
鬼雲 (鬼車改) -- (C) K.Takata <kentkt AT csc DOT jp>
https://github.com/k-takata/Onigmo
鬼雲は、鬼車から派生した正規表現ライブラリである。
Perl 5.10以降で新たに導入された正規表現の一部が使用可能になっている。
一部のパッチは Ruby 2.0.0 からマージしている。
主な新機能:
正規表現 (文法依存):
\K, \R, \X, (?(cond)yes|no)
(?adlu), \g{name}, \g{n}, (?&name), (?n), (?R), (?0)
(?P<name>...), (?P=name), (?P>name)
API:
onig_search_gpos (Perl互換の \G 用)
エンコーディング:
CP932
文法:
Python
新規ソースファイル:
enc/cp932.c CP932 エンコーディング
enc/unicode/casefold.h Unicodeケースフォールドデータ
enc/unicode/name2ctype.h Unicodeプロパティデータ
onig.py onig.dll/libonig.so ローダ
testpy.py テストプログラム
tool/CaseFolding.py casefold.hを生成
tool/convert-name2ctype.sh name2ctype.kwdをname2ctypes.hに変換
tool/enc-unicode.rb name2ctype.kwdを生成
win32/makedef.py onig.defを作成
win32/onig.rc onig.dll用リソースファイル
ToDo:
* Unicode Character Data のサイズ削減。
* (?|...)
* (?(cond)yes|no) の改善。(先読み・戻り読みの対応)
以下、鬼車の README.ja:
======================================================================
README.ja 2007/05/31
鬼車 ---- (C) K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
http://www.geocities.jp/kosako3/oniguruma/
鬼車は正規表現ライブラリである。
このライブラリの特長は、それぞれの正規表現オブジェクトごとに
文字エンコーディングを指定できることである。
サポートしている文字エンコーディング:
ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,
EUC-JP, EUC-TW, EUC-KR, EUC-CN,
Shift_JIS, Big5, GB18030, KOI8-R, CP1251,
ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,
ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,
ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
* GB18030: 久保健洋氏提供
* CP1251: Byte氏提供
------------------------------------------------------------
ライセンス
BSDライセンスに従う。
インストール
ケース1: UnixとCygwin環境
1. ./configure
2. make
3. make install
アンインストール
make uninstall
動作テスト (ASCII/EUC-JP)
make atest
構成確認
onig-config --cflags
onig-config --libs
onig-config --prefix
onig-config --exec-prefix
ケース2: Win32(VC++)環境
1. copy win32\Makefile Makefile
2. copy win32\config.h config.h
3. nmake
onig_s.lib: static link library
onig.dll: dynamic link library
* 動作テスト (ASCII/Shift_JIS)
4. copy win32\testc.c testc.c
5. nmake ctest
正規表現
doc/RE.jaを参照
使用方法
使用するプログラムで、oniguruma.hをインクルードする(Oniguruma APIの場合)。
Oniguruma APIについては、doc/API.jaを参照。
oniguruma.hで定義されている型名UChar(== unsigned char)を無効にしたい場合
には、ONIG_ESCAPE_UCHAR_COLLISIONをdefineしてからoniguruma.hをインクルード
すること。このときにはUCharは定義されず、OnigUCharという名前の定義のみが
有効になる。
oniguruma.hで定義されている型名regex_tを無効にしたい場合には、
ONIG_ESCAPE_REGEX_T_COLLISIONをdefineしてからoniguruma.hをインクルード
すること。このときにはregex_tは定義されず、OnigRegexType, OnigRegexという
名前の定義のみが有効になる。
Unix/Cygwin上でコンパイル、リンクする場合の例
(prefixが/usr/localのとき)
cc sample.c -L/usr/local/lib -lonig
GNU libtoolを使用しているので、プラットフォームが共有ライブラリをサポートして
いれば、使用できるようになっている。
静的ライブラリと共有ライブラリのどちらを使用するかを指定する方法、実行時点での
環境設定方法については、自分で調べて下さい。
Win32でスタティックリンクライブラリ(onig_s.lib)をリンクする場合には、
コンパイルするときに -DONIG_EXTERN=extern をコンパイル引数に追加すること。
使用例プログラム
sample/simple.c 最小例 (Oniguruma API)
sample/names.c 名前付きグループコールバック使用例
sample/encode.c 幾つかの文字エンコーディング使用例
sample/listcap.c 捕獲履歴機能の使用例
sample/posix.c POSIX API使用例
sample/sql.c 可変メタ文字機能使用例 (SQL-like パターン)
テストプログラム
sample/syntax.c Perl、Java、ASIS文法のテスト
sample/crnl.c --enable-crnl-as-line-terminator テスト
ソースファイル
oniguruma.h 鬼車APIヘッダ (公開)
onig-config.in onig-configプログラム テンプレート
regenc.h 文字エンコーディング枠組みヘッダ
regint.h 内部宣言
regparse.h regparse.cとregcomp.cのための内部宣言
regcomp.c コンパイル、最適化関数
regenc.c 文字エンコーディング枠組み
regerror.c エラーメッセージ関数
regext.c 拡張API関数
regexec.c 検索、照合関数
regparse.c 正規表現パターン解析関数
regsyntax.c 正規表現パターン文法関数、組込み文法定義
regtrav.c 捕獲履歴木巡回関数
regversion.c 版情報関数
st.h ハッシュテーブル関数宣言
st.c ハッシュテーブル関数
oniggnu.h GNU regex APIヘッダ (公開)
reggnu.c GNU regex API関数
onigposix.h POSIX APIヘッダ (公開)
regposerr.c POSIX APIエラーメッセージ関数
regposix.c POSIX API関数
enc/mktable.c 文字タイプテーブル生成プログラム
enc/ascii.c ASCII エンコーディング
enc/euc_jp.c EUC-JP エンコーディング
enc/euc_tw.c EUC-TW エンコーディング
enc/euc_kr.c EUC-KR, EUC-CN エンコーディング
enc/sjis.c Shift_JIS エンコーディング
enc/big5.c Big5 エンコーディング
enc/gb18030.c GB18030 エンコーディング
enc/koi8.c KOI8 エンコーディング
enc/koi8_r.c KOI8-R エンコーディング
enc/cp1251.c CP1251 エンコーディング
enc/iso8859_1.c ISO-8859-1 (Latin-1)
enc/iso8859_2.c ISO-8859-2 (Latin-2)
enc/iso8859_3.c ISO-8859-3 (Latin-3)
enc/iso8859_4.c ISO-8859-4 (Latin-4)
enc/iso8859_5.c ISO-8859-5 (Cyrillic)
enc/iso8859_6.c ISO-8859-6 (Arabic)
enc/iso8859_7.c ISO-8859-7 (Greek)
enc/iso8859_8.c ISO-8859-8 (Hebrew)
enc/iso8859_9.c ISO-8859-9 (Latin-5 または Turkish)
enc/iso8859_10.c ISO-8859-10 (Latin-6 または Nordic)
enc/iso8859_11.c ISO-8859-11 (Thai)
enc/iso8859_13.c ISO-8859-13 (Latin-7 または Baltic Rim)
enc/iso8859_14.c ISO-8859-14 (Latin-8 または Celtic)
enc/iso8859_15.c ISO-8859-15 (Latin-9 または West European with Euro)
enc/iso8859_16.c ISO-8859-16
(Latin-10 または South-Eastern European with Euro)
enc/utf8.c UTF-8 エンコーディング
enc/utf16_be.c UTF-16BE エンコーディング
enc/utf16_le.c UTF-16LE エンコーディング
enc/utf32_be.c UTF-32BE エンコーディング
enc/utf32_le.c UTF-32LE エンコーディング
enc/unicode.c Unicode情報
win32/Makefile Win32用 Makefile (for VC++)
win32/config.h Win32用 config.h
残件
? case fold flag: Katakana <-> Hiragana
? ONIG_OPTION_NOTBOS/NOTEOS追加 (\A, \z, \Z)
?? \X (== \PM\pM*)
?? 文法要素 ONIG_SYN_CONTEXT_INDEP_ANCHORSの実装
?? 検索位置移動停止演算子 (match_at()からONIG_STOPを返す)
and I'm thankful to Akinori MUSHA.
アドレス: K.Kosako <sndgk393 AT ybb DOT ne DOT jp>

991
src/Onigmo/aclocal.m4 vendored Normal file
View File

@ -0,0 +1,991 @@
# generated automatically by aclocal 1.11.1 -*- Autoconf -*-
# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
# 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
m4_ifndef([AC_AUTOCONF_VERSION],
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.65],,
[m4_warning([this file was generated for autoconf 2.65.
You have another version of autoconf. It may work, but is not guaranteed to.
If you have problems, you may need to regenerate the build system entirely.
To do so, use the procedure documented by the package, typically `autoreconf'.])])
# Copyright (C) 2002, 2003, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# AM_AUTOMAKE_VERSION(VERSION)
# ----------------------------
# Automake X.Y traces this macro to ensure aclocal.m4 has been
# generated from the m4 files accompanying Automake X.Y.
# (This private macro should not be called outside this file.)
AC_DEFUN([AM_AUTOMAKE_VERSION],
[am__api_version='1.11'
dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
dnl require some minimum version. Point them to the right macro.
m4_if([$1], [1.11.1], [],
[AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
])
# _AM_AUTOCONF_VERSION(VERSION)
# -----------------------------
# aclocal traces this macro to find the Autoconf version.
# This is a private macro too. Using m4_define simplifies
# the logic in aclocal, which can simply ignore this definition.
m4_define([_AM_AUTOCONF_VERSION], [])
# AM_SET_CURRENT_AUTOMAKE_VERSION
# -------------------------------
# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
# This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
[AM_AUTOMAKE_VERSION([1.11.1])dnl
m4_ifndef([AC_AUTOCONF_VERSION],
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
# AM_AUX_DIR_EXPAND -*- Autoconf -*-
# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets
# $ac_aux_dir to `$srcdir/foo'. In other projects, it is set to
# `$srcdir', `$srcdir/..', or `$srcdir/../..'.
#
# Of course, Automake must honor this variable whenever it calls a
# tool from the auxiliary directory. The problem is that $srcdir (and
# therefore $ac_aux_dir as well) can be either absolute or relative,
# depending on how configure is run. This is pretty annoying, since
# it makes $ac_aux_dir quite unusable in subdirectories: in the top
# source directory, any form will work fine, but in subdirectories a
# relative path needs to be adjusted first.
#
# $ac_aux_dir/missing
# fails when called from a subdirectory if $ac_aux_dir is relative
# $top_srcdir/$ac_aux_dir/missing
# fails if $ac_aux_dir is absolute,
# fails when called from a subdirectory in a VPATH build with
# a relative $ac_aux_dir
#
# The reason of the latter failure is that $top_srcdir and $ac_aux_dir
# are both prefixed by $srcdir. In an in-source build this is usually
# harmless because $srcdir is `.', but things will broke when you
# start a VPATH build or use an absolute $srcdir.
#
# So we could use something similar to $top_srcdir/$ac_aux_dir/missing,
# iff we strip the leading $srcdir from $ac_aux_dir. That would be:
# am_aux_dir='\$(top_srcdir)/'`expr "$ac_aux_dir" : "$srcdir//*\(.*\)"`
# and then we would define $MISSING as
# MISSING="\${SHELL} $am_aux_dir/missing"
# This will work as long as MISSING is not called from configure, because
# unfortunately $(top_srcdir) has no meaning in configure.
# However there are other variables, like CC, which are often used in
# configure, and could therefore not use this "fixed" $ac_aux_dir.
#
# Another solution, used here, is to always expand $ac_aux_dir to an
# absolute PATH. The drawback is that using absolute paths prevent a
# configured tree to be moved without reconfiguration.
AC_DEFUN([AM_AUX_DIR_EXPAND],
[dnl Rely on autoconf to set up CDPATH properly.
AC_PREREQ([2.50])dnl
# expand $ac_aux_dir to an absolute path
am_aux_dir=`cd $ac_aux_dir && pwd`
])
# AM_CONDITIONAL -*- Autoconf -*-
# Copyright (C) 1997, 2000, 2001, 2003, 2004, 2005, 2006, 2008
# Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# serial 9
# AM_CONDITIONAL(NAME, SHELL-CONDITION)
# -------------------------------------
# Define a conditional.
AC_DEFUN([AM_CONDITIONAL],
[AC_PREREQ(2.52)dnl
ifelse([$1], [TRUE], [AC_FATAL([$0: invalid condition: $1])],
[$1], [FALSE], [AC_FATAL([$0: invalid condition: $1])])dnl
AC_SUBST([$1_TRUE])dnl
AC_SUBST([$1_FALSE])dnl
_AM_SUBST_NOTMAKE([$1_TRUE])dnl
_AM_SUBST_NOTMAKE([$1_FALSE])dnl
m4_define([_AM_COND_VALUE_$1], [$2])dnl
if $2; then
$1_TRUE=
$1_FALSE='#'
else
$1_TRUE='#'
$1_FALSE=
fi
AC_CONFIG_COMMANDS_PRE(
[if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then
AC_MSG_ERROR([[conditional "$1" was never defined.
Usually this means the macro was only invoked conditionally.]])
fi])])
# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2009
# Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# serial 10
# There are a few dirty hacks below to avoid letting `AC_PROG_CC' be
# written in clear, in which case automake, when reading aclocal.m4,
# will think it sees a *use*, and therefore will trigger all it's
# C support machinery. Also note that it means that autoscan, seeing
# CC etc. in the Makefile, will ask for an AC_PROG_CC use...
# _AM_DEPENDENCIES(NAME)
# ----------------------
# See how the compiler implements dependency checking.
# NAME is "CC", "CXX", "GCJ", or "OBJC".
# We try a few techniques and use that to set a single cache variable.
#
# We don't AC_REQUIRE the corresponding AC_PROG_CC since the latter was
# modified to invoke _AM_DEPENDENCIES(CC); we would have a circular
# dependency, and given that the user is not expected to run this macro,
# just rely on AC_PROG_CC.
AC_DEFUN([_AM_DEPENDENCIES],
[AC_REQUIRE([AM_SET_DEPDIR])dnl
AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS])dnl
AC_REQUIRE([AM_MAKE_INCLUDE])dnl
AC_REQUIRE([AM_DEP_TRACK])dnl
ifelse([$1], CC, [depcc="$CC" am_compiler_list=],
[$1], CXX, [depcc="$CXX" am_compiler_list=],
[$1], OBJC, [depcc="$OBJC" am_compiler_list='gcc3 gcc'],
[$1], UPC, [depcc="$UPC" am_compiler_list=],
[$1], GCJ, [depcc="$GCJ" am_compiler_list='gcc3 gcc'],
[depcc="$$1" am_compiler_list=])
AC_CACHE_CHECK([dependency style of $depcc],
[am_cv_$1_dependencies_compiler_type],
[if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then
# We make a subdir and do the tests there. Otherwise we can end up
# making bogus files that we don't know about and never remove. For
# instance it was reported that on HP-UX the gcc test will end up
# making a dummy file named `D' -- because `-MD' means `put the output
# in D'.
mkdir conftest.dir
# Copy depcomp to subdir because otherwise we won't find it if we're
# using a relative directory.
cp "$am_depcomp" conftest.dir
cd conftest.dir
# We will build objects and dependencies in a subdirectory because
# it helps to detect inapplicable dependency modes. For instance
# both Tru64's cc and ICC support -MD to output dependencies as a
# side effect of compilation, but ICC will put the dependencies in
# the current directory while Tru64 will put them in the object
# directory.
mkdir sub
am_cv_$1_dependencies_compiler_type=none
if test "$am_compiler_list" = ""; then
am_compiler_list=`sed -n ['s/^#*\([a-zA-Z0-9]*\))$/\1/p'] < ./depcomp`
fi
am__universal=false
m4_case([$1], [CC],
[case " $depcc " in #(
*\ -arch\ *\ -arch\ *) am__universal=true ;;
esac],
[CXX],
[case " $depcc " in #(
*\ -arch\ *\ -arch\ *) am__universal=true ;;
esac])
for depmode in $am_compiler_list; do
# Setup a source with many dependencies, because some compilers
# like to wrap large dependency lists on column 80 (with \), and
# we should not choose a depcomp mode which is confused by this.
#
# We need to recreate these files for each test, as the compiler may
# overwrite some of them when testing with obscure command lines.
# This happens at least with the AIX C compiler.
: > sub/conftest.c
for i in 1 2 3 4 5 6; do
echo '#include "conftst'$i'.h"' >> sub/conftest.c
# Using `: > sub/conftst$i.h' creates only sub/conftst1.h with
# Solaris 8's {/usr,}/bin/sh.
touch sub/conftst$i.h
done
echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf
# We check with `-c' and `-o' for the sake of the "dashmstdout"
# mode. It turns out that the SunPro C++ compiler does not properly
# handle `-M -o', and we need to detect this. Also, some Intel
# versions had trouble with output in subdirs
am__obj=sub/conftest.${OBJEXT-o}
am__minus_obj="-o $am__obj"
case $depmode in
gcc)
# This depmode causes a compiler race in universal mode.
test "$am__universal" = false || continue
;;
nosideeffect)
# after this tag, mechanisms are not by side-effect, so they'll
# only be used when explicitly requested
if test "x$enable_dependency_tracking" = xyes; then
continue
else
break
fi
;;
msvisualcpp | msvcmsys)
# This compiler won't grok `-c -o', but also, the minuso test has
# not run yet. These depmodes are late enough in the game, and
# so weak that their functioning should not be impacted.
am__obj=conftest.${OBJEXT-o}
am__minus_obj=
;;
none) break ;;
esac
if depmode=$depmode \
source=sub/conftest.c object=$am__obj \
depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \
$SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \
>/dev/null 2>conftest.err &&
grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 &&
grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 &&
grep $am__obj sub/conftest.Po > /dev/null 2>&1 &&
${MAKE-make} -s -f confmf > /dev/null 2>&1; then
# icc doesn't choke on unknown options, it will just issue warnings
# or remarks (even with -Werror). So we grep stderr for any message
# that says an option was ignored or not supported.
# When given -MP, icc 7.0 and 7.1 complain thusly:
# icc: Command line warning: ignoring option '-M'; no argument required
# The diagnosis changed in icc 8.0:
# icc: Command line remark: option '-MP' not supported
if (grep 'ignoring option' conftest.err ||
grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else
am_cv_$1_dependencies_compiler_type=$depmode
break
fi
fi
done
cd ..
rm -rf conftest.dir
else
am_cv_$1_dependencies_compiler_type=none
fi
])
AC_SUBST([$1DEPMODE], [depmode=$am_cv_$1_dependencies_compiler_type])
AM_CONDITIONAL([am__fastdep$1], [
test "x$enable_dependency_tracking" != xno \
&& test "$am_cv_$1_dependencies_compiler_type" = gcc3])
])
# AM_SET_DEPDIR
# -------------
# Choose a directory name for dependency files.
# This macro is AC_REQUIREd in _AM_DEPENDENCIES
AC_DEFUN([AM_SET_DEPDIR],
[AC_REQUIRE([AM_SET_LEADING_DOT])dnl
AC_SUBST([DEPDIR], ["${am__leading_dot}deps"])dnl
])
# AM_DEP_TRACK
# ------------
AC_DEFUN([AM_DEP_TRACK],
[AC_ARG_ENABLE(dependency-tracking,
[ --disable-dependency-tracking speeds up one-time build
--enable-dependency-tracking do not reject slow dependency extractors])
if test "x$enable_dependency_tracking" != xno; then
am_depcomp="$ac_aux_dir/depcomp"
AMDEPBACKSLASH='\'
fi
AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno])
AC_SUBST([AMDEPBACKSLASH])dnl
_AM_SUBST_NOTMAKE([AMDEPBACKSLASH])dnl
])
# Generate code to set up dependency tracking. -*- Autoconf -*-
# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2008
# Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
#serial 5
# _AM_OUTPUT_DEPENDENCY_COMMANDS
# ------------------------------
AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS],
[{
# Autoconf 2.62 quotes --file arguments for eval, but not when files
# are listed without --file. Let's play safe and only enable the eval
# if we detect the quoting.
case $CONFIG_FILES in
*\'*) eval set x "$CONFIG_FILES" ;;
*) set x $CONFIG_FILES ;;
esac
shift
for mf
do
# Strip MF so we end up with the name of the file.
mf=`echo "$mf" | sed -e 's/:.*$//'`
# Check whether this is an Automake generated Makefile or not.
# We used to match only the files named `Makefile.in', but
# some people rename them; so instead we look at the file content.
# Grep'ing the first line is not enough: some people post-process
# each Makefile.in and add a new line on top of each file to say so.
# Grep'ing the whole file is not good either: AIX grep has a line
# limit of 2048, but all sed's we know have understand at least 4000.
if sed -n 's,^#.*generated by automake.*,X,p' "$mf" | grep X >/dev/null 2>&1; then
dirpart=`AS_DIRNAME("$mf")`
else
continue
fi
# Extract the definition of DEPDIR, am__include, and am__quote
# from the Makefile without running `make'.
DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"`
test -z "$DEPDIR" && continue
am__include=`sed -n 's/^am__include = //p' < "$mf"`
test -z "am__include" && continue
am__quote=`sed -n 's/^am__quote = //p' < "$mf"`
# When using ansi2knr, U may be empty or an underscore; expand it
U=`sed -n 's/^U = //p' < "$mf"`
# Find all dependency output files, they are included files with
# $(DEPDIR) in their names. We invoke sed twice because it is the
# simplest approach to changing $(DEPDIR) to its actual value in the
# expansion.
for file in `sed -n "
s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \
sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g' -e 's/\$U/'"$U"'/g'`; do
# Make sure the directory exists.
test -f "$dirpart/$file" && continue
fdir=`AS_DIRNAME(["$file"])`
AS_MKDIR_P([$dirpart/$fdir])
# echo "creating $dirpart/$file"
echo '# dummy' > "$dirpart/$file"
done
done
}
])# _AM_OUTPUT_DEPENDENCY_COMMANDS
# AM_OUTPUT_DEPENDENCY_COMMANDS
# -----------------------------
# This macro should only be invoked once -- use via AC_REQUIRE.
#
# This code is only required when automatic dependency tracking
# is enabled. FIXME. This creates each `.P' file that we will
# need in order to bootstrap the dependency handling code.
AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
[AC_CONFIG_COMMANDS([depfiles],
[test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS],
[AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir"])
])
# Do all the work for Automake. -*- Autoconf -*-
# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
# 2005, 2006, 2008, 2009 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# serial 16
# This macro actually does too much. Some checks are only needed if
# your package does certain things. But this isn't really a big deal.
# AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE])
# AM_INIT_AUTOMAKE([OPTIONS])
# -----------------------------------------------
# The call with PACKAGE and VERSION arguments is the old style
# call (pre autoconf-2.50), which is being phased out. PACKAGE
# and VERSION should now be passed to AC_INIT and removed from
# the call to AM_INIT_AUTOMAKE.
# We support both call styles for the transition. After
# the next Automake release, Autoconf can make the AC_INIT
# arguments mandatory, and then we can depend on a new Autoconf
# release and drop the old call support.
AC_DEFUN([AM_INIT_AUTOMAKE],
[AC_PREREQ([2.62])dnl
dnl Autoconf wants to disallow AM_ names. We explicitly allow
dnl the ones we care about.
m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl
AC_REQUIRE([AM_SET_CURRENT_AUTOMAKE_VERSION])dnl
AC_REQUIRE([AC_PROG_INSTALL])dnl
if test "`cd $srcdir && pwd`" != "`pwd`"; then
# Use -I$(srcdir) only when $(srcdir) != ., so that make's output
# is not polluted with repeated "-I."
AC_SUBST([am__isrc], [' -I$(srcdir)'])_AM_SUBST_NOTMAKE([am__isrc])dnl
# test to see if srcdir already configured
if test -f $srcdir/config.status; then
AC_MSG_ERROR([source directory already configured; run "make distclean" there first])
fi
fi
# test whether we have cygpath
if test -z "$CYGPATH_W"; then
if (cygpath --version) >/dev/null 2>/dev/null; then
CYGPATH_W='cygpath -w'
else
CYGPATH_W=echo
fi
fi
AC_SUBST([CYGPATH_W])
# Define the identity of the package.
dnl Distinguish between old-style and new-style calls.
m4_ifval([$2],
[m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl
AC_SUBST([PACKAGE], [$1])dnl
AC_SUBST([VERSION], [$2])],
[_AM_SET_OPTIONS([$1])dnl
dnl Diagnose old-style AC_INIT with new-style AM_AUTOMAKE_INIT.
m4_if(m4_ifdef([AC_PACKAGE_NAME], 1)m4_ifdef([AC_PACKAGE_VERSION], 1), 11,,
[m4_fatal([AC_INIT should be called with package and version arguments])])dnl
AC_SUBST([PACKAGE], ['AC_PACKAGE_TARNAME'])dnl
AC_SUBST([VERSION], ['AC_PACKAGE_VERSION'])])dnl
_AM_IF_OPTION([no-define],,
[AC_DEFINE_UNQUOTED(PACKAGE, "$PACKAGE", [Name of package])
AC_DEFINE_UNQUOTED(VERSION, "$VERSION", [Version number of package])])dnl
# Some tools Automake needs.
AC_REQUIRE([AM_SANITY_CHECK])dnl
AC_REQUIRE([AC_ARG_PROGRAM])dnl
AM_MISSING_PROG(ACLOCAL, aclocal-${am__api_version})
AM_MISSING_PROG(AUTOCONF, autoconf)
AM_MISSING_PROG(AUTOMAKE, automake-${am__api_version})
AM_MISSING_PROG(AUTOHEADER, autoheader)
AM_MISSING_PROG(MAKEINFO, makeinfo)
AC_REQUIRE([AM_PROG_INSTALL_SH])dnl
AC_REQUIRE([AM_PROG_INSTALL_STRIP])dnl
AC_REQUIRE([AM_PROG_MKDIR_P])dnl
# We need awk for the "check" target. The system "awk" is bad on
# some platforms.
AC_REQUIRE([AC_PROG_AWK])dnl
AC_REQUIRE([AC_PROG_MAKE_SET])dnl
AC_REQUIRE([AM_SET_LEADING_DOT])dnl
_AM_IF_OPTION([tar-ustar], [_AM_PROG_TAR([ustar])],
[_AM_IF_OPTION([tar-pax], [_AM_PROG_TAR([pax])],
[_AM_PROG_TAR([v7])])])
_AM_IF_OPTION([no-dependencies],,
[AC_PROVIDE_IFELSE([AC_PROG_CC],
[_AM_DEPENDENCIES(CC)],
[define([AC_PROG_CC],
defn([AC_PROG_CC])[_AM_DEPENDENCIES(CC)])])dnl
AC_PROVIDE_IFELSE([AC_PROG_CXX],
[_AM_DEPENDENCIES(CXX)],
[define([AC_PROG_CXX],
defn([AC_PROG_CXX])[_AM_DEPENDENCIES(CXX)])])dnl
AC_PROVIDE_IFELSE([AC_PROG_OBJC],
[_AM_DEPENDENCIES(OBJC)],
[define([AC_PROG_OBJC],
defn([AC_PROG_OBJC])[_AM_DEPENDENCIES(OBJC)])])dnl
])
_AM_IF_OPTION([silent-rules], [AC_REQUIRE([AM_SILENT_RULES])])dnl
dnl The `parallel-tests' driver may need to know about EXEEXT, so add the
dnl `am__EXEEXT' conditional if _AM_COMPILER_EXEEXT was seen. This macro
dnl is hooked onto _AC_COMPILER_EXEEXT early, see below.
AC_CONFIG_COMMANDS_PRE(dnl
[m4_provide_if([_AM_COMPILER_EXEEXT],
[AM_CONDITIONAL([am__EXEEXT], [test -n "$EXEEXT"])])])dnl
])
dnl Hook into `_AC_COMPILER_EXEEXT' early to learn its expansion. Do not
dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further
dnl mangled by Autoconf and run in a shell conditional statement.
m4_define([_AC_COMPILER_EXEEXT],
m4_defn([_AC_COMPILER_EXEEXT])[m4_provide([_AM_COMPILER_EXEEXT])])
# When config.status generates a header, we must update the stamp-h file.
# This file resides in the same directory as the config header
# that is generated. The stamp files are numbered to have different names.
# Autoconf calls _AC_AM_CONFIG_HEADER_HOOK (when defined) in the
# loop where config.status creates the headers, so we can generate
# our stamp files there.
AC_DEFUN([_AC_AM_CONFIG_HEADER_HOOK],
[# Compute $1's index in $config_headers.
_am_arg=$1
_am_stamp_count=1
for _am_header in $config_headers :; do
case $_am_header in
$_am_arg | $_am_arg:* )
break ;;
* )
_am_stamp_count=`expr $_am_stamp_count + 1` ;;
esac
done
echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
# Copyright (C) 2001, 2003, 2005, 2008 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# AM_PROG_INSTALL_SH
# ------------------
# Define $install_sh.
AC_DEFUN([AM_PROG_INSTALL_SH],
[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
if test x"${install_sh}" != xset; then
case $am_aux_dir in
*\ * | *\ *)
install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;;
*)
install_sh="\${SHELL} $am_aux_dir/install-sh"
esac
fi
AC_SUBST(install_sh)])
# Copyright (C) 2003, 2005 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# serial 2
# Check whether the underlying file-system supports filenames
# with a leading dot. For instance MS-DOS doesn't.
AC_DEFUN([AM_SET_LEADING_DOT],
[rm -rf .tst 2>/dev/null
mkdir .tst 2>/dev/null
if test -d .tst; then
am__leading_dot=.
else
am__leading_dot=_
fi
rmdir .tst 2>/dev/null
AC_SUBST([am__leading_dot])])
# Check to see how 'make' treats includes. -*- Autoconf -*-
# Copyright (C) 2001, 2002, 2003, 2005, 2009 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# serial 4
# AM_MAKE_INCLUDE()
# -----------------
# Check to see how make treats includes.
AC_DEFUN([AM_MAKE_INCLUDE],
[am_make=${MAKE-make}
cat > confinc << 'END'
am__doit:
@echo this is the am__doit target
.PHONY: am__doit
END
# If we don't find an include directive, just comment out the code.
AC_MSG_CHECKING([for style of include used by $am_make])
am__include="#"
am__quote=
_am_result=none
# First try GNU make style include.
echo "include confinc" > confmf
# Ignore all kinds of additional output from `make'.
case `$am_make -s -f confmf 2> /dev/null` in #(
*the\ am__doit\ target*)
am__include=include
am__quote=
_am_result=GNU
;;
esac
# Now try BSD make style include.
if test "$am__include" = "#"; then
echo '.include "confinc"' > confmf
case `$am_make -s -f confmf 2> /dev/null` in #(
*the\ am__doit\ target*)
am__include=.include
am__quote="\""
_am_result=BSD
;;
esac
fi
AC_SUBST([am__include])
AC_SUBST([am__quote])
AC_MSG_RESULT([$_am_result])
rm -f confinc confmf
])
# Copyright (C) 1999, 2000, 2001, 2003, 2004, 2005, 2008
# Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# serial 6
# AM_PROG_CC_C_O
# --------------
# Like AC_PROG_CC_C_O, but changed for automake.
AC_DEFUN([AM_PROG_CC_C_O],
[AC_REQUIRE([AC_PROG_CC_C_O])dnl
AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
AC_REQUIRE_AUX_FILE([compile])dnl
# FIXME: we rely on the cache variable name because
# there is no other way.
set dummy $CC
am_cc=`echo $[2] | sed ['s/[^a-zA-Z0-9_]/_/g;s/^[0-9]/_/']`
eval am_t=\$ac_cv_prog_cc_${am_cc}_c_o
if test "$am_t" != yes; then
# Losing compiler, so override with the script.
# FIXME: It is wrong to rewrite CC.
# But if we don't then we get into trouble of one sort or another.
# A longer-term fix would be to have automake use am__CC in this case,
# and then we could set am__CC="\$(top_srcdir)/compile \$(CC)"
CC="$am_aux_dir/compile $CC"
fi
dnl Make sure AC_PROG_CC is never called again, or it will override our
dnl setting of CC.
m4_define([AC_PROG_CC],
[m4_fatal([AC_PROG_CC cannot be called after AM_PROG_CC_C_O])])
])
# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*-
# Copyright (C) 1997, 1999, 2000, 2001, 2003, 2004, 2005, 2008
# Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# serial 6
# AM_MISSING_PROG(NAME, PROGRAM)
# ------------------------------
AC_DEFUN([AM_MISSING_PROG],
[AC_REQUIRE([AM_MISSING_HAS_RUN])
$1=${$1-"${am_missing_run}$2"}
AC_SUBST($1)])
# AM_MISSING_HAS_RUN
# ------------------
# Define MISSING if not defined so far and test if it supports --run.
# If it does, set am_missing_run to use it, otherwise, to nothing.
AC_DEFUN([AM_MISSING_HAS_RUN],
[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
AC_REQUIRE_AUX_FILE([missing])dnl
if test x"${MISSING+set}" != xset; then
case $am_aux_dir in
*\ * | *\ *)
MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;;
*)
MISSING="\${SHELL} $am_aux_dir/missing" ;;
esac
fi
# Use eval to expand $SHELL
if eval "$MISSING --run true"; then
am_missing_run="$MISSING --run "
else
am_missing_run=
AC_MSG_WARN([`missing' script is too old or missing])
fi
])
# Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# AM_PROG_MKDIR_P
# ---------------
# Check for `mkdir -p'.
AC_DEFUN([AM_PROG_MKDIR_P],
[AC_PREREQ([2.60])dnl
AC_REQUIRE([AC_PROG_MKDIR_P])dnl
dnl Automake 1.8 to 1.9.6 used to define mkdir_p. We now use MKDIR_P,
dnl while keeping a definition of mkdir_p for backward compatibility.
dnl @MKDIR_P@ is magic: AC_OUTPUT adjusts its value for each Makefile.
dnl However we cannot define mkdir_p as $(MKDIR_P) for the sake of
dnl Makefile.ins that do not define MKDIR_P, so we do our own
dnl adjustment using top_builddir (which is defined more often than
dnl MKDIR_P).
AC_SUBST([mkdir_p], ["$MKDIR_P"])dnl
case $mkdir_p in
[[\\/$]]* | ?:[[\\/]]*) ;;
*/*) mkdir_p="\$(top_builddir)/$mkdir_p" ;;
esac
])
# Helper functions for option handling. -*- Autoconf -*-
# Copyright (C) 2001, 2002, 2003, 2005, 2008 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# serial 4
# _AM_MANGLE_OPTION(NAME)
# -----------------------
AC_DEFUN([_AM_MANGLE_OPTION],
[[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])])
# _AM_SET_OPTION(NAME)
# ------------------------------
# Set option NAME. Presently that only means defining a flag for this option.
AC_DEFUN([_AM_SET_OPTION],
[m4_define(_AM_MANGLE_OPTION([$1]), 1)])
# _AM_SET_OPTIONS(OPTIONS)
# ----------------------------------
# OPTIONS is a space-separated list of Automake options.
AC_DEFUN([_AM_SET_OPTIONS],
[m4_foreach_w([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])])
# _AM_IF_OPTION(OPTION, IF-SET, [IF-NOT-SET])
# -------------------------------------------
# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise.
AC_DEFUN([_AM_IF_OPTION],
[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])])
# Check to make sure that the build environment is sane. -*- Autoconf -*-
# Copyright (C) 1996, 1997, 2000, 2001, 2003, 2005, 2008
# Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# serial 5
# AM_SANITY_CHECK
# ---------------
AC_DEFUN([AM_SANITY_CHECK],
[AC_MSG_CHECKING([whether build environment is sane])
# Just in case
sleep 1
echo timestamp > conftest.file
# Reject unsafe characters in $srcdir or the absolute working directory
# name. Accept space and tab only in the latter.
am_lf='
'
case `pwd` in
*[[\\\"\#\$\&\'\`$am_lf]]*)
AC_MSG_ERROR([unsafe absolute working directory name]);;
esac
case $srcdir in
*[[\\\"\#\$\&\'\`$am_lf\ \ ]]*)
AC_MSG_ERROR([unsafe srcdir value: `$srcdir']);;
esac
# Do `set' in a subshell so we don't clobber the current shell's
# arguments. Must try -L first in case configure is actually a
# symlink; some systems play weird games with the mod time of symlinks
# (eg FreeBSD returns the mod time of the symlink's containing
# directory).
if (
set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null`
if test "$[*]" = "X"; then
# -L didn't work.
set X `ls -t "$srcdir/configure" conftest.file`
fi
rm -f conftest.file
if test "$[*]" != "X $srcdir/configure conftest.file" \
&& test "$[*]" != "X conftest.file $srcdir/configure"; then
# If neither matched, then we have a broken ls. This can happen
# if, for instance, CONFIG_SHELL is bash and it inherits a
# broken ls alias from the environment. This has actually
# happened. Such a system could not be considered "sane".
AC_MSG_ERROR([ls -t appears to fail. Make sure there is not a broken
alias in your environment])
fi
test "$[2]" = conftest.file
)
then
# Ok.
:
else
AC_MSG_ERROR([newly created file is older than distributed files!
Check your system clock])
fi
AC_MSG_RESULT(yes)])
# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# AM_PROG_INSTALL_STRIP
# ---------------------
# One issue with vendor `install' (even GNU) is that you can't
# specify the program used to strip binaries. This is especially
# annoying in cross-compiling environments, where the build's strip
# is unlikely to handle the host's binaries.
# Fortunately install-sh will honor a STRIPPROG variable, so we
# always use install-sh in `make install-strip', and initialize
# STRIPPROG with the value of the STRIP variable (set by the user).
AC_DEFUN([AM_PROG_INSTALL_STRIP],
[AC_REQUIRE([AM_PROG_INSTALL_SH])dnl
# Installed binaries are usually stripped using `strip' when the user
# run `make install-strip'. However `strip' might not be the right
# tool to use in cross-compilation environments, therefore Automake
# will honor the `STRIP' environment variable to overrule this program.
dnl Don't test for $cross_compiling = yes, because it might be `maybe'.
if test "$cross_compiling" != no; then
AC_CHECK_TOOL([STRIP], [strip], :)
fi
INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
AC_SUBST([INSTALL_STRIP_PROGRAM])])
# Copyright (C) 2006, 2008 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# serial 2
# _AM_SUBST_NOTMAKE(VARIABLE)
# ---------------------------
# Prevent Automake from outputting VARIABLE = @VARIABLE@ in Makefile.in.
# This macro is traced by Automake.
AC_DEFUN([_AM_SUBST_NOTMAKE])
# AM_SUBST_NOTMAKE(VARIABLE)
# ---------------------------
# Public sister of _AM_SUBST_NOTMAKE.
AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)])
# Check how to create a tarball. -*- Autoconf -*-
# Copyright (C) 2004, 2005 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# serial 2
# _AM_PROG_TAR(FORMAT)
# --------------------
# Check how to create a tarball in format FORMAT.
# FORMAT should be one of `v7', `ustar', or `pax'.
#
# Substitute a variable $(am__tar) that is a command
# writing to stdout a FORMAT-tarball containing the directory
# $tardir.
# tardir=directory && $(am__tar) > result.tar
#
# Substitute a variable $(am__untar) that extract such
# a tarball read from stdin.
# $(am__untar) < result.tar
AC_DEFUN([_AM_PROG_TAR],
[# Always define AMTAR for backward compatibility.
AM_MISSING_PROG([AMTAR], [tar])
m4_if([$1], [v7],
[am__tar='${AMTAR} chof - "$$tardir"'; am__untar='${AMTAR} xf -'],
[m4_case([$1], [ustar],, [pax],,
[m4_fatal([Unknown tar format])])
AC_MSG_CHECKING([how to create a $1 tar archive])
# Loop over all known methods to create a tar archive until one works.
_am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none'
_am_tools=${am_cv_prog_tar_$1-$_am_tools}
# Do not fold the above two line into one, because Tru64 sh and
# Solaris sh will not grok spaces in the rhs of `-'.
for _am_tool in $_am_tools
do
case $_am_tool in
gnutar)
for _am_tar in tar gnutar gtar;
do
AM_RUN_LOG([$_am_tar --version]) && break
done
am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"'
am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"'
am__untar="$_am_tar -xf -"
;;
plaintar)
# Must skip GNU tar: if it does not support --format= it doesn't create
# ustar tarball either.
(tar --version) >/dev/null 2>&1 && continue
am__tar='tar chf - "$$tardir"'
am__tar_='tar chf - "$tardir"'
am__untar='tar xf -'
;;
pax)
am__tar='pax -L -x $1 -w "$$tardir"'
am__tar_='pax -L -x $1 -w "$tardir"'
am__untar='pax -r'
;;
cpio)
am__tar='find "$$tardir" -print | cpio -o -H $1 -L'
am__tar_='find "$tardir" -print | cpio -o -H $1 -L'
am__untar='cpio -i -H $1 -d'
;;
none)
am__tar=false
am__tar_=false
am__untar=false
;;
esac
# If the value was cached, stop now. We just wanted to have am__tar
# and am__untar set.
test -n "${am_cv_prog_tar_$1}" && break
# tar/untar a dummy directory, and stop if the command works
rm -rf conftest.dir
mkdir conftest.dir
echo GrepMe > conftest.dir/file
AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar])
rm -rf conftest.dir
if test -s conftest.tar; then
AM_RUN_LOG([$am__untar <conftest.tar])
grep GrepMe conftest.dir/file >/dev/null 2>&1 && break
fi
done
rm -rf conftest.dir
AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool])
AC_MSG_RESULT([$am_cv_prog_tar_$1])])
AC_SUBST([am__tar])
AC_SUBST([am__untar])
]) # _AM_PROG_TAR
m4_include([m4/libtool.m4])
m4_include([m4/ltoptions.m4])
m4_include([m4/ltsugar.m4])
m4_include([m4/ltversion.m4])
m4_include([m4/lt~obsolete.m4])

143
src/Onigmo/compile Normal file
View File

@ -0,0 +1,143 @@
#! /bin/sh
# Wrapper for compilers which do not understand `-c -o'.
scriptversion=2009-10-06.20; # UTC
# Copyright (C) 1999, 2000, 2003, 2004, 2005, 2009 Free Software
# Foundation, Inc.
# Written by Tom Tromey <tromey@cygnus.com>.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
# configuration script generated by Autoconf, you may include it under
# the same distribution terms that you use for the rest of that program.
# This file is maintained in Automake, please report
# bugs to <bug-automake@gnu.org> or send patches to
# <automake-patches@gnu.org>.
case $1 in
'')
echo "$0: No command. Try \`$0 --help' for more information." 1>&2
exit 1;
;;
-h | --h*)
cat <<\EOF
Usage: compile [--help] [--version] PROGRAM [ARGS]
Wrapper for compilers which do not understand `-c -o'.
Remove `-o dest.o' from ARGS, run PROGRAM with the remaining
arguments, and rename the output as expected.
If you are trying to build a whole package this is not the
right script to run: please start by reading the file `INSTALL'.
Report bugs to <bug-automake@gnu.org>.
EOF
exit $?
;;
-v | --v*)
echo "compile $scriptversion"
exit $?
;;
esac
ofile=
cfile=
eat=
for arg
do
if test -n "$eat"; then
eat=
else
case $1 in
-o)
# configure might choose to run compile as `compile cc -o foo foo.c'.
# So we strip `-o arg' only if arg is an object.
eat=1
case $2 in
*.o | *.obj)
ofile=$2
;;
*)
set x "$@" -o "$2"
shift
;;
esac
;;
*.c)
cfile=$1
set x "$@" "$1"
shift
;;
*)
set x "$@" "$1"
shift
;;
esac
fi
shift
done
if test -z "$ofile" || test -z "$cfile"; then
# If no `-o' option was seen then we might have been invoked from a
# pattern rule where we don't need one. That is ok -- this is a
# normal compilation that the losing compiler can handle. If no
# `.c' file was seen then we are probably linking. That is also
# ok.
exec "$@"
fi
# Name of file we expect compiler to create.
cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'`
# Create the lock directory.
# Note: use `[/\\:.-]' here to ensure that we don't use the same name
# that we are using for the .o file. Also, base the name on the expected
# object file name, since that is what matters with a parallel build.
lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d
while true; do
if mkdir "$lockdir" >/dev/null 2>&1; then
break
fi
sleep 1
done
# FIXME: race condition here if user kills between mkdir and trap.
trap "rmdir '$lockdir'; exit 1" 1 2 15
# Run the compile.
"$@"
ret=$?
if test -f "$cofile"; then
test "$cofile" = "$ofile" || mv "$cofile" "$ofile"
elif test -f "${cofile}bj"; then
test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile"
fi
rmdir "$lockdir"
exit $ret
# Local Variables:
# mode: shell-script
# sh-indentation: 2
# eval: (add-hook 'write-file-hooks 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-time-zone: "UTC"
# time-stamp-end: "; # UTC"
# End:

1502
src/Onigmo/config.guess vendored Normal file

File diff suppressed because it is too large Load Diff

121
src/Onigmo/config.h.in Normal file
View File

@ -0,0 +1,121 @@
/* config.h.in. Generated from configure.in by autoheader. */
/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP
systems. This function is required for `alloca.c' support on those systems.
*/
#undef CRAY_STACKSEG_END
/* Define to 1 if using `alloca.c'. */
#undef C_ALLOCA
/* Define to 1 if you have `alloca', as a function or macro. */
#undef HAVE_ALLOCA
/* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix).
*/
#undef HAVE_ALLOCA_H
/* Define to 1 if you have the <dlfcn.h> header file. */
#undef HAVE_DLFCN_H
/* Define to 1 if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H
/* Define to 1 if you have the <memory.h> header file. */
#undef HAVE_MEMORY_H
/* Define if compilerr supports prototypes */
#undef HAVE_PROTOTYPES
/* Define if compiler supports stdarg prototypes */
#undef HAVE_STDARG_PROTOTYPES
/* Define to 1 if you have the <stdint.h> header file. */
#undef HAVE_STDINT_H
/* Define to 1 if you have the <stdlib.h> header file. */
#undef HAVE_STDLIB_H
/* Define to 1 if you have the <strings.h> header file. */
#undef HAVE_STRINGS_H
/* Define to 1 if you have the <string.h> header file. */
#undef HAVE_STRING_H
/* Define to 1 if you have the <sys/stat.h> header file. */
#undef HAVE_SYS_STAT_H
/* Define to 1 if you have the <sys/times.h> header file. */
#undef HAVE_SYS_TIMES_H
/* Define to 1 if you have the <sys/time.h> header file. */
#undef HAVE_SYS_TIME_H
/* Define to 1 if you have the <sys/types.h> header file. */
#undef HAVE_SYS_TYPES_H
/* Define to 1 if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H
/* Define to the sub-directory in which libtool stores uninstalled libraries.
*/
#undef LT_OBJDIR
/* Define to 1 if your C compiler doesn't accept -c and -o together. */
#undef NO_MINUS_C_MINUS_O
/* Name of package */
#undef PACKAGE
/* Define to the address where bug reports for this package should be sent. */
#undef PACKAGE_BUGREPORT
/* Define to the full name of this package. */
#undef PACKAGE_NAME
/* Define to the full name and version of this package. */
#undef PACKAGE_STRING
/* Define to the one symbol short name of this package. */
#undef PACKAGE_TARNAME
/* Define to the home page for this package. */
#undef PACKAGE_URL
/* Define to the version of this package. */
#undef PACKAGE_VERSION
/* The size of `int', as computed by sizeof. */
#undef SIZEOF_INT
/* The size of `long', as computed by sizeof. */
#undef SIZEOF_LONG
/* The size of `short', as computed by sizeof. */
#undef SIZEOF_SHORT
/* If using the C implementation of alloca, define if you know the
direction of stack growth for your system; otherwise it will be
automatically deduced at runtime.
STACK_DIRECTION > 0 => grows toward higher addresses
STACK_DIRECTION < 0 => grows toward lower addresses
STACK_DIRECTION = 0 => direction of growth unknown */
#undef STACK_DIRECTION
/* Define to 1 if you have the ANSI C header files. */
#undef STDC_HEADERS
/* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */
#undef TIME_WITH_SYS_TIME
/* Define if combination explosion check */
#undef USE_COMBINATION_EXPLOSION_CHECK
/* Define if enable CR+NL as line terminator */
#undef USE_CRNL_AS_LINE_TERMINATOR
/* Version number of package */
#undef VERSION
/* Define to empty if `const' does not conform to ANSI C. */
#undef const

1714
src/Onigmo/config.sub vendored Normal file

File diff suppressed because it is too large Load Diff

13803
src/Onigmo/configure vendored Normal file

File diff suppressed because it is too large Load Diff

96
src/Onigmo/configure.in Normal file
View File

@ -0,0 +1,96 @@
dnl Process this file with autoconf to produce a configure script.
AC_INIT(onig, 5.13.5)
AC_CONFIG_MACRO_DIR([m4])
AM_INIT_AUTOMAKE(foreign)
AC_CONFIG_HEADER(config.h)
dnl default value for RUBYDIR
RUBYDIR=".."
AC_ARG_WITH(rubydir,
[ --with-rubydir=RUBYDIR specify value for RUBYDIR (default ..)],
[ RUBYDIR=$withval ])
AC_SUBST(RUBYDIR)
dnl default value for STATISTICS
STATISTICS=""
AC_ARG_WITH(statistics,
[ --with-statistics take matching time statistical data],
[ STATISTICS=-DONIG_DEBUG_STATISTICS ])
AC_SUBST(STATISTICS)
dnl check for COMBINATION_EXPLOSION
AC_ARG_ENABLE(combination-explosion-check,
[ --enable-combination-explosion-check enable combination explosion check],
[comb_expl_check=$enableval])
if test "${comb_expl_check}" = yes; then
AC_DEFINE(USE_COMBINATION_EXPLOSION_CHECK,1,[Define if combination explosion check])
fi
dnl check for CRNL_AS_LINE_TERMINATOR
AC_ARG_ENABLE(crnl-as-line-terminator,
[ --enable-crnl-as-line-terminator enable CR+NL as line terminator],
[crnl_as_line_terminator=$enableval])
if test "${crnl_as_line_terminator}" = yes; then
AC_DEFINE(USE_CRNL_AS_LINE_TERMINATOR,1,[Define if enable CR+NL as line terminator])
fi
dnl Checks for programs.
AC_PROG_CC
AC_PROG_LIBTOOL
AM_PROG_CC_C_O
LTVERSION="5:0:5"
AC_SUBST(LTVERSION)
AC_PROG_INSTALL
AC_PROG_MAKE_SET
dnl Checks for libraries.
dnl Checks for header files.
AC_HEADER_STDC
AC_CHECK_HEADERS(stdlib.h string.h strings.h sys/time.h unistd.h sys/times.h stdint.h)
dnl Checks for typedefs, structures, and compiler characteristics.
AC_CHECK_SIZEOF(int, 4)
AC_CHECK_SIZEOF(short, 2)
AC_CHECK_SIZEOF(long, 4)
AC_C_CONST
AC_HEADER_TIME
dnl Checks for library functions.
AC_FUNC_ALLOCA
AC_FUNC_MEMCMP
AC_CACHE_CHECK(for prototypes, _cv_have_prototypes,
[AC_TRY_COMPILE([int foo(int x) { return 0; }], [return foo(10);],
_cv_have_prototypes=yes,
_cv_have_prototypes=no)])
if test "$_cv_have_prototypes" = yes; then
AC_DEFINE(HAVE_PROTOTYPES,1,[Define if compilerr supports prototypes])
fi
AC_CACHE_CHECK(for variable length prototypes and stdarg.h, _cv_stdarg,
[AC_TRY_COMPILE([
#include <stdarg.h>
int foo(int x, ...) {
va_list va;
va_start(va, x);
va_arg(va, int);
va_arg(va, char *);
va_arg(va, double);
va_end(va);
return 0;
}
], [return foo(10, "", 3.14);],
_cv_stdarg=yes,
_cv_stdarg=no)])
if test "$_cv_stdarg" = yes; then
AC_DEFINE(HAVE_STDARG_PROTOTYPES,1,[Define if compiler supports stdarg prototypes])
fi
AC_OUTPUT([Makefile onig-config sample/Makefile], [chmod +x onig-config])

630
src/Onigmo/depcomp Normal file
View File

@ -0,0 +1,630 @@
#! /bin/sh
# depcomp - compile a program generating dependencies as side-effects
scriptversion=2009-04-28.21; # UTC
# Copyright (C) 1999, 2000, 2003, 2004, 2005, 2006, 2007, 2009 Free
# Software Foundation, Inc.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
# configuration script generated by Autoconf, you may include it under
# the same distribution terms that you use for the rest of that program.
# Originally written by Alexandre Oliva <oliva@dcc.unicamp.br>.
case $1 in
'')
echo "$0: No command. Try \`$0 --help' for more information." 1>&2
exit 1;
;;
-h | --h*)
cat <<\EOF
Usage: depcomp [--help] [--version] PROGRAM [ARGS]
Run PROGRAMS ARGS to compile a file, generating dependencies
as side-effects.
Environment variables:
depmode Dependency tracking mode.
source Source file read by `PROGRAMS ARGS'.
object Object file output by `PROGRAMS ARGS'.
DEPDIR directory where to store dependencies.
depfile Dependency file to output.
tmpdepfile Temporary file to use when outputing dependencies.
libtool Whether libtool is used (yes/no).
Report bugs to <bug-automake@gnu.org>.
EOF
exit $?
;;
-v | --v*)
echo "depcomp $scriptversion"
exit $?
;;
esac
if test -z "$depmode" || test -z "$source" || test -z "$object"; then
echo "depcomp: Variables source, object and depmode must be set" 1>&2
exit 1
fi
# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po.
depfile=${depfile-`echo "$object" |
sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`}
tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`}
rm -f "$tmpdepfile"
# Some modes work just like other modes, but use different flags. We
# parameterize here, but still list the modes in the big case below,
# to make depend.m4 easier to write. Note that we *cannot* use a case
# here, because this file can only contain one case statement.
if test "$depmode" = hp; then
# HP compiler uses -M and no extra arg.
gccflag=-M
depmode=gcc
fi
if test "$depmode" = dashXmstdout; then
# This is just like dashmstdout with a different argument.
dashmflag=-xM
depmode=dashmstdout
fi
cygpath_u="cygpath -u -f -"
if test "$depmode" = msvcmsys; then
# This is just like msvisualcpp but w/o cygpath translation.
# Just convert the backslash-escaped backslashes to single forward
# slashes to satisfy depend.m4
cygpath_u="sed s,\\\\\\\\,/,g"
depmode=msvisualcpp
fi
case "$depmode" in
gcc3)
## gcc 3 implements dependency tracking that does exactly what
## we want. Yay! Note: for some reason libtool 1.4 doesn't like
## it if -MD -MP comes after the -MF stuff. Hmm.
## Unfortunately, FreeBSD c89 acceptance of flags depends upon
## the command line argument order; so add the flags where they
## appear in depend2.am. Note that the slowdown incurred here
## affects only configure: in makefiles, %FASTDEP% shortcuts this.
for arg
do
case $arg in
-c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;;
*) set fnord "$@" "$arg" ;;
esac
shift # fnord
shift # $arg
done
"$@"
stat=$?
if test $stat -eq 0; then :
else
rm -f "$tmpdepfile"
exit $stat
fi
mv "$tmpdepfile" "$depfile"
;;
gcc)
## There are various ways to get dependency output from gcc. Here's
## why we pick this rather obscure method:
## - Don't want to use -MD because we'd like the dependencies to end
## up in a subdir. Having to rename by hand is ugly.
## (We might end up doing this anyway to support other compilers.)
## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
## -MM, not -M (despite what the docs say).
## - Using -M directly means running the compiler twice (even worse
## than renaming).
if test -z "$gccflag"; then
gccflag=-MD,
fi
"$@" -Wp,"$gccflag$tmpdepfile"
stat=$?
if test $stat -eq 0; then :
else
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
echo "$object : \\" > "$depfile"
alpha=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
## The second -e expression handles DOS-style file names with drive letters.
sed -e 's/^[^:]*: / /' \
-e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
## This next piece of magic avoids the `deleted header file' problem.
## The problem is that when a header file which appears in a .P file
## is deleted, the dependency causes make to die (because there is
## typically no way to rebuild the header). We avoid this by adding
## dummy dependencies for each header file. Too bad gcc doesn't do
## this for us directly.
tr ' ' '
' < "$tmpdepfile" |
## Some versions of gcc put a space before the `:'. On the theory
## that the space means something, we add a space to the output as
## well.
## Some versions of the HPUX 10.20 sed can't process this invocation
## correctly. Breaking it into two sed invocations is a workaround.
sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
rm -f "$tmpdepfile"
;;
hp)
# This case exists only to let depend.m4 do its work. It works by
# looking at the text of this script. This case will never be run,
# since it is checked for above.
exit 1
;;
sgi)
if test "$libtool" = yes; then
"$@" "-Wp,-MDupdate,$tmpdepfile"
else
"$@" -MDupdate "$tmpdepfile"
fi
stat=$?
if test $stat -eq 0; then :
else
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files
echo "$object : \\" > "$depfile"
# Clip off the initial element (the dependent). Don't try to be
# clever and replace this with sed code, as IRIX sed won't handle
# lines with more than a fixed number of characters (4096 in
# IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines;
# the IRIX cc adds comments like `#:fec' to the end of the
# dependency line.
tr ' ' '
' < "$tmpdepfile" \
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' | \
tr '
' ' ' >> "$depfile"
echo >> "$depfile"
# The second pass generates a dummy entry for each header file.
tr ' ' '
' < "$tmpdepfile" \
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
>> "$depfile"
else
# The sourcefile does not contain any dependencies, so just
# store a dummy comment line, to avoid errors with the Makefile
# "include basename.Plo" scheme.
echo "#dummy" > "$depfile"
fi
rm -f "$tmpdepfile"
;;
aix)
# The C for AIX Compiler uses -M and outputs the dependencies
# in a .u file. In older versions, this file always lives in the
# current directory. Also, the AIX compiler puts `$object:' at the
# start of each line; $object doesn't have directory information.
# Version 6 uses the directory in both cases.
dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
test "x$dir" = "x$object" && dir=
base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
if test "$libtool" = yes; then
tmpdepfile1=$dir$base.u
tmpdepfile2=$base.u
tmpdepfile3=$dir.libs/$base.u
"$@" -Wc,-M
else
tmpdepfile1=$dir$base.u
tmpdepfile2=$dir$base.u
tmpdepfile3=$dir$base.u
"$@" -M
fi
stat=$?
if test $stat -eq 0; then :
else
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
exit $stat
fi
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
do
test -f "$tmpdepfile" && break
done
if test -f "$tmpdepfile"; then
# Each line is of the form `foo.o: dependent.h'.
# Do two passes, one to just change these to
# `$object: dependent.h' and one to simply `dependent.h:'.
sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
# That's a tab and a space in the [].
sed -e 's,^.*\.[a-z]*:[ ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
else
# The sourcefile does not contain any dependencies, so just
# store a dummy comment line, to avoid errors with the Makefile
# "include basename.Plo" scheme.
echo "#dummy" > "$depfile"
fi
rm -f "$tmpdepfile"
;;
icc)
# Intel's C compiler understands `-MD -MF file'. However on
# icc -MD -MF foo.d -c -o sub/foo.o sub/foo.c
# ICC 7.0 will fill foo.d with something like
# foo.o: sub/foo.c
# foo.o: sub/foo.h
# which is wrong. We want:
# sub/foo.o: sub/foo.c
# sub/foo.o: sub/foo.h
# sub/foo.c:
# sub/foo.h:
# ICC 7.1 will output
# foo.o: sub/foo.c sub/foo.h
# and will wrap long lines using \ :
# foo.o: sub/foo.c ... \
# sub/foo.h ... \
# ...
"$@" -MD -MF "$tmpdepfile"
stat=$?
if test $stat -eq 0; then :
else
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
# Each line is of the form `foo.o: dependent.h',
# or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'.
# Do two passes, one to just change these to
# `$object: dependent.h' and one to simply `dependent.h:'.
sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile"
# Some versions of the HPUX 10.20 sed can't process this invocation
# correctly. Breaking it into two sed invocations is a workaround.
sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" |
sed -e 's/$/ :/' >> "$depfile"
rm -f "$tmpdepfile"
;;
hp2)
# The "hp" stanza above does not work with aCC (C++) and HP's ia64
# compilers, which have integrated preprocessors. The correct option
# to use with these is +Maked; it writes dependencies to a file named
# 'foo.d', which lands next to the object file, wherever that
# happens to be.
# Much of this is similar to the tru64 case; see comments there.
dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
test "x$dir" = "x$object" && dir=
base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
if test "$libtool" = yes; then
tmpdepfile1=$dir$base.d
tmpdepfile2=$dir.libs/$base.d
"$@" -Wc,+Maked
else
tmpdepfile1=$dir$base.d
tmpdepfile2=$dir$base.d
"$@" +Maked
fi
stat=$?
if test $stat -eq 0; then :
else
rm -f "$tmpdepfile1" "$tmpdepfile2"
exit $stat
fi
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2"
do
test -f "$tmpdepfile" && break
done
if test -f "$tmpdepfile"; then
sed -e "s,^.*\.[a-z]*:,$object:," "$tmpdepfile" > "$depfile"
# Add `dependent.h:' lines.
sed -ne '2,${
s/^ *//
s/ \\*$//
s/$/:/
p
}' "$tmpdepfile" >> "$depfile"
else
echo "#dummy" > "$depfile"
fi
rm -f "$tmpdepfile" "$tmpdepfile2"
;;
tru64)
# The Tru64 compiler uses -MD to generate dependencies as a side
# effect. `cc -MD -o foo.o ...' puts the dependencies into `foo.o.d'.
# At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
# dependencies in `foo.d' instead, so we check for that too.
# Subdirectories are respected.
dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
test "x$dir" = "x$object" && dir=
base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
if test "$libtool" = yes; then
# With Tru64 cc, shared objects can also be used to make a
# static library. This mechanism is used in libtool 1.4 series to
# handle both shared and static libraries in a single compilation.
# With libtool 1.4, dependencies were output in $dir.libs/$base.lo.d.
#
# With libtool 1.5 this exception was removed, and libtool now
# generates 2 separate objects for the 2 libraries. These two
# compilations output dependencies in $dir.libs/$base.o.d and
# in $dir$base.o.d. We have to check for both files, because
# one of the two compilations can be disabled. We should prefer
# $dir$base.o.d over $dir.libs/$base.o.d because the latter is
# automatically cleaned when .libs/ is deleted, while ignoring
# the former would cause a distcleancheck panic.
tmpdepfile1=$dir.libs/$base.lo.d # libtool 1.4
tmpdepfile2=$dir$base.o.d # libtool 1.5
tmpdepfile3=$dir.libs/$base.o.d # libtool 1.5
tmpdepfile4=$dir.libs/$base.d # Compaq CCC V6.2-504
"$@" -Wc,-MD
else
tmpdepfile1=$dir$base.o.d
tmpdepfile2=$dir$base.d
tmpdepfile3=$dir$base.d
tmpdepfile4=$dir$base.d
"$@" -MD
fi
stat=$?
if test $stat -eq 0; then :
else
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4"
exit $stat
fi
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4"
do
test -f "$tmpdepfile" && break
done
if test -f "$tmpdepfile"; then
sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
# That's a tab and a space in the [].
sed -e 's,^.*\.[a-z]*:[ ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
else
echo "#dummy" > "$depfile"
fi
rm -f "$tmpdepfile"
;;
#nosideeffect)
# This comment above is used by automake to tell side-effect
# dependency tracking mechanisms from slower ones.
dashmstdout)
# Important note: in order to support this mode, a compiler *must*
# always write the preprocessed file to stdout, regardless of -o.
"$@" || exit $?
# Remove the call to Libtool.
if test "$libtool" = yes; then
while test "X$1" != 'X--mode=compile'; do
shift
done
shift
fi
# Remove `-o $object'.
IFS=" "
for arg
do
case $arg in
-o)
shift
;;
$object)
shift
;;
*)
set fnord "$@" "$arg"
shift # fnord
shift # $arg
;;
esac
done
test -z "$dashmflag" && dashmflag=-M
# Require at least two characters before searching for `:'
# in the target name. This is to cope with DOS-style filenames:
# a dependency such as `c:/foo/bar' could be seen as target `c' otherwise.
"$@" $dashmflag |
sed 's:^[ ]*[^: ][^:][^:]*\:[ ]*:'"$object"'\: :' > "$tmpdepfile"
rm -f "$depfile"
cat < "$tmpdepfile" > "$depfile"
tr ' ' '
' < "$tmpdepfile" | \
## Some versions of the HPUX 10.20 sed can't process this invocation
## correctly. Breaking it into two sed invocations is a workaround.
sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
rm -f "$tmpdepfile"
;;
dashXmstdout)
# This case only exists to satisfy depend.m4. It is never actually
# run, as this mode is specially recognized in the preamble.
exit 1
;;
makedepend)
"$@" || exit $?
# Remove any Libtool call
if test "$libtool" = yes; then
while test "X$1" != 'X--mode=compile'; do
shift
done
shift
fi
# X makedepend
shift
cleared=no eat=no
for arg
do
case $cleared in
no)
set ""; shift
cleared=yes ;;
esac
if test $eat = yes; then
eat=no
continue
fi
case "$arg" in
-D*|-I*)
set fnord "$@" "$arg"; shift ;;
# Strip any option that makedepend may not understand. Remove
# the object too, otherwise makedepend will parse it as a source file.
-arch)
eat=yes ;;
-*|$object)
;;
*)
set fnord "$@" "$arg"; shift ;;
esac
done
obj_suffix=`echo "$object" | sed 's/^.*\././'`
touch "$tmpdepfile"
${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@"
rm -f "$depfile"
cat < "$tmpdepfile" > "$depfile"
sed '1,2d' "$tmpdepfile" | tr ' ' '
' | \
## Some versions of the HPUX 10.20 sed can't process this invocation
## correctly. Breaking it into two sed invocations is a workaround.
sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
rm -f "$tmpdepfile" "$tmpdepfile".bak
;;
cpp)
# Important note: in order to support this mode, a compiler *must*
# always write the preprocessed file to stdout.
"$@" || exit $?
# Remove the call to Libtool.
if test "$libtool" = yes; then
while test "X$1" != 'X--mode=compile'; do
shift
done
shift
fi
# Remove `-o $object'.
IFS=" "
for arg
do
case $arg in
-o)
shift
;;
$object)
shift
;;
*)
set fnord "$@" "$arg"
shift # fnord
shift # $arg
;;
esac
done
"$@" -E |
sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
-e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' |
sed '$ s: \\$::' > "$tmpdepfile"
rm -f "$depfile"
echo "$object : \\" > "$depfile"
cat < "$tmpdepfile" >> "$depfile"
sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile"
rm -f "$tmpdepfile"
;;
msvisualcpp)
# Important note: in order to support this mode, a compiler *must*
# always write the preprocessed file to stdout.
"$@" || exit $?
# Remove the call to Libtool.
if test "$libtool" = yes; then
while test "X$1" != 'X--mode=compile'; do
shift
done
shift
fi
IFS=" "
for arg
do
case "$arg" in
-o)
shift
;;
$object)
shift
;;
"-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI")
set fnord "$@"
shift
shift
;;
*)
set fnord "$@" "$arg"
shift
shift
;;
esac
done
"$@" -E 2>/dev/null |
sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile"
rm -f "$depfile"
echo "$object : \\" > "$depfile"
sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s:: \1 \\:p' >> "$depfile"
echo " " >> "$depfile"
sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile"
rm -f "$tmpdepfile"
;;
msvcmsys)
# This case exists only to let depend.m4 do its work. It works by
# looking at the text of this script. This case will never be run,
# since it is checked for above.
exit 1
;;
none)
exec "$@"
;;
*)
echo "Unknown depmode $depmode" 1>&2
exit 1
;;
esac
exit 0
# Local Variables:
# mode: shell-script
# sh-indentation: 2
# eval: (add-hook 'write-file-hooks 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-time-zone: "UTC"
# time-stamp-end: "; # UTC"
# End:

620
src/Onigmo/doc/API Normal file
View File

@ -0,0 +1,620 @@
Onigmo (Oniguruma-mod) API Version 5.11.4 2011/10/08
#include <oniguruma.h>
# int onig_init(void)
Initialize library.
You don't have to call it explicitly, because it is called in onig_new().
# int onig_error_code_to_str(UChar* err_buf, OnigPosition err_code, ...)
Get error message string.
If this function is used for onig_new(),
don't call this after the pattern argument of onig_new() is freed.
normal return: error message string length
arguments
1 err_buf: error message string buffer.
(required size: ONIG_MAX_ERROR_MESSAGE_LEN)
2 err_code: error code returned by other API functions.
3 err_info (optional): error info returned by onig_new().
# void onig_set_warn_func(OnigWarnFunc func)
Set warning function.
WARNING:
'[', '-', ']' in character class without escape.
']' in pattern without escape.
arguments
1 func: function pointer. void (*func)(char* warning_message)
# void onig_set_verb_warn_func(OnigWarnFunc func)
Set verbose warning function.
WARNING:
redundant nested repeat operator.
arguments
1 func: function pointer. void (*func)(char* warning_message)
# int onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
OnigErrorInfo* err_info)
Create a regex object.
normal return: ONIG_NORMAL
arguments
1 reg: return regex object's address.
2 pattern: regex pattern string.
3 pattern_end: terminate address of pattern. (pattern + pattern length)
4 option: compile time options.
ONIG_OPTION_NONE no option
ONIG_OPTION_SINGLELINE '^' -> '\A', '$' -> '\Z'
ONIG_OPTION_DOTALL '.' match with newline
ONIG_OPTION_MULTILINE same as ONIG_OPTION_DOTALL
ONIG_OPTION_IGNORECASE ambiguity match on
ONIG_OPTION_EXTEND extended pattern form
ONIG_OPTION_FIND_LONGEST find longest match
ONIG_OPTION_FIND_NOT_EMPTY ignore empty match
ONIG_OPTION_NEGATE_SINGLELINE
clear ONIG_OPTION_SINGLELINE which is enabled on
ONIG_SYNTAX_POSIX_BASIC, ONIG_SYNTAX_POSIX_EXTENDED,
ONIG_SYNTAX_PERL, ONIG_SYNTAX_PERL58, ONIG_SYNTAX_PERL58_NG,
ONIG_SYNTAX_JAVA, ONIG_SYNTAX_PYTHON
ONIG_OPTION_DONT_CAPTURE_GROUP only named group captured.
ONIG_OPTION_CAPTURE_GROUP named and no-named group captured.
ONIG_OPTION_NEWLINE_CRLF
Treat CR+LF as a newline too. (default: LF only)
To use this option, you must enable the following line in regenc.h.
/* #define USE_CRNL_AS_LINE_TERMINATOR */
5 enc: character encoding.
ONIG_ENCODING_ASCII ASCII
ONIG_ENCODING_ISO_8859_1 ISO 8859-1
ONIG_ENCODING_ISO_8859_2 ISO 8859-2
ONIG_ENCODING_ISO_8859_3 ISO 8859-3
ONIG_ENCODING_ISO_8859_4 ISO 8859-4
ONIG_ENCODING_ISO_8859_5 ISO 8859-5
ONIG_ENCODING_ISO_8859_6 ISO 8859-6
ONIG_ENCODING_ISO_8859_7 ISO 8859-7
ONIG_ENCODING_ISO_8859_8 ISO 8859-8
ONIG_ENCODING_ISO_8859_9 ISO 8859-9
ONIG_ENCODING_ISO_8859_10 ISO 8859-10
ONIG_ENCODING_ISO_8859_11 ISO 8859-11
ONIG_ENCODING_ISO_8859_13 ISO 8859-13
ONIG_ENCODING_ISO_8859_14 ISO 8859-14
ONIG_ENCODING_ISO_8859_15 ISO 8859-15
ONIG_ENCODING_ISO_8859_16 ISO 8859-16
ONIG_ENCODING_UTF8 UTF-8
ONIG_ENCODING_UTF16_BE UTF-16BE
ONIG_ENCODING_UTF16_LE UTF-16LE
ONIG_ENCODING_UTF32_BE UTF-32BE
ONIG_ENCODING_UTF32_LE UTF-32LE
ONIG_ENCODING_EUC_JP EUC-JP
ONIG_ENCODING_EUC_TW EUC-TW
ONIG_ENCODING_EUC_KR EUC-KR
ONIG_ENCODING_EUC_CN EUC-CN
ONIG_ENCODING_SJIS Shift_JIS
ONIG_ENCODING_KOI8_R KOI8-R
ONIG_ENCODING_CP1251 CP1251
ONIG_ENCODING_BIG5 Big5
ONIG_ENCODING_GB18030 GB18030
or any OnigEncodingType data address defined by user.
6 syntax: address of pattern syntax definition.
ONIG_SYNTAX_ASIS plain text
ONIG_SYNTAX_POSIX_BASIC POSIX Basic RE
ONIG_SYNTAX_POSIX_EXTENDED POSIX Extended RE
ONIG_SYNTAX_EMACS Emacs
ONIG_SYNTAX_GREP grep
ONIG_SYNTAX_GNU_REGEX GNU regex
ONIG_SYNTAX_JAVA Java (Sun java.util.regex)
ONIG_SYNTAX_PERL58 Perl 5.8
ONIG_SYNTAX_PERL58_NG Perl 5.8 + named group
ONIG_SYNTAX_PERL Perl 5.10+
ONIG_SYNTAX_PYTHON Python
ONIG_SYNTAX_RUBY Ruby
ONIG_SYNTAX_DEFAULT default (== Ruby)
onig_set_default_syntax()
or any OnigSyntaxType data address defined by user.
7 err_info: address for return optional error info.
Use this value as 3rd argument of onig_error_code_to_str().
# int onig_new_without_alloc(regex_t* reg, const UChar* pattern,
const UChar* pattern_end,
OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
OnigErrorInfo* err_info)
Create a regex object.
reg object area is not allocated in this function.
normal return: ONIG_NORMAL
# int onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
OnigCompileInfo* ci, OnigErrorInfo* einfo)
Create a regex object.
This function is deluxe version of onig_new().
normal return: ONIG_NORMAL
arguments
1 reg: return address of regex object.
2 pattern: regex pattern string.
3 pattern_end: terminate address of pattern. (pattern + pattern length)
4 ci: compile time info.
ci->num_of_elements: number of elements in ci. (current version: 5)
ci->pattern_enc: pattern string character encoding.
ci->target_enc: target string character encoding.
ci->syntax: address of pattern syntax definition.
ci->option: compile time option.
ci->case_fold_flag: character matching case fold bit flag for
ONIG_OPTION_IGNORECASE mode.
ONIGENC_CASE_FOLD_MIN: minimum
ONIGENC_CASE_FOLD_DEFAULT: minimum
onig_set_default_case_fold_flag()
5 err_info: address for return optional error info.
Use this value as 3rd argument of onig_error_code_to_str().
Different character encoding combination is allowed for
the following cases only.
pattern_enc: ASCII, ISO_8859_1
target_enc: UTF16_BE, UTF16_LE, UTF32_BE, UTF32_LE
pattern_enc: UTF16_BE/LE
target_enc: UTF16_LE/BE
pattern_enc: UTF32_BE/LE
target_enc: UTF32_LE/BE
# void onig_free(regex_t* reg)
Free memory used by regex object.
arguments
1 reg: regex object.
# void onig_free_body(regex_t* reg)
Free memory used by regex object. (Except reg oneself.)
arguments
1 reg: regex object.
# OnigPosition onig_search(regex_t* reg, const UChar* str, const UChar* end,
const UChar* start, const UChar* range, OnigRegion* region,
OnigOptionType option)
# OnigPosition onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
const UChar* global_pos,
const UChar* start, const UChar* range, OnigRegion* region,
OnigOptionType option)
Search string and return search result and matching region.
normal return: match position offset (i.e. p - str >= 0)
not found: ONIG_MISMATCH (< 0)
arguments
1 reg: regex object
2 str: target string
3 end: terminate address of target string
4 global_pos: position of \G
if not need, set same value as str.
5 start: search start address of target string
6 range: search terminate address of target string
in forward search (start <= searched string < range)
in backward search (range <= searched string <= start)
7 region: address for return group match range info (NULL is allowed)
8 option: search time option
ONIG_OPTION_NOTBOL string head(str) isn't considered as begin of line
ONIG_OPTION_NOTEOL string end (end) isn't considered as end of line
ONIG_OPTION_POSIX_REGION region argument is regmatch_t[] of POSIX API.
# OnigPosition onig_match(regex_t* reg, const UChar* str, const UChar* end,
const UChar* at, OnigRegion* region, OnigOptionType option)
Match string and return result and matching region.
normal return: match length (>= 0)
not match: ONIG_MISMATCH ( < 0)
arguments
1 reg: regex object
2 str: target string
3 end: terminate address of target string
4 at: match address of target string
5 region: address for return group match range info (NULL is allowed)
6 option: search time option
ONIG_OPTION_NOTBOL string head(str) isn't considered as begin of line
ONIG_OPTION_NOTEOL string end (end) isn't considered as end of line
ONIG_OPTION_POSIX_REGION region argument is regmatch_t[] type of POSIX API.
# OnigRegion* onig_region_new(void)
Create a region.
# void onig_region_free(OnigRegion* region, int free_self)
Free memory used by region.
arguments
1 region: target region
2 free_self: [1: free all, 0: free memory used in region but not self]
# void onig_region_copy(OnigRegion* to, OnigRegion* from)
Copy contents of region.
arguments
1 to: target region
2 from: source region
# void onig_region_clear(OnigRegion* region)
Clear contents of region.
arguments
1 region: target region
# int onig_region_resize(OnigRegion* region, int n)
Resize group range area of region.
normal return: ONIG_NORMAL
arguments
1 region: target region
2 n: new size
# int onig_name_to_group_numbers(regex_t* reg, const UChar* name, const UChar* name_end,
int** num_list)
Return the group number list of the name.
Named subexp is defined by (?<name>....).
normal return: number of groups for the name.
(ex. /(?<x>..)(?<x>..)/ ==> 2)
name not found: -1
arguments
1 reg: regex object.
2 name: group name.
3 name_end: terminate address of group name.
4 num_list: return list of group number.
# int onig_name_to_backref_number(regex_t* reg, const UChar* name, const UChar* name_end,
OnigRegion *region)
Return the group number corresponding to the named backref (\k<name>).
If two or more regions for the groups of the name are effective,
the greatest number in it is obtained.
normal return: group number.
arguments
1 reg: regex object.
2 name: group name.
3 name_end: terminate address of group name.
4 region: search/match result region.
# int onig_foreach_name(regex_t* reg,
int (*func)(const UChar*, const UChar*, int,int*,regex_t*,void*),
void* arg)
Iterate function call for all names.
normal return: 0
error: func's return value.
arguments
1 reg: regex object.
2 func: callback function.
func(name, name_end, <number of groups>, <group number's list>,
reg, arg);
if func does not return 0, then iteration is stopped.
3 arg: argument for func.
# int onig_number_of_names(regex_t* reg)
Return the number of names defined in the pattern.
Multiple definitions of one name is counted as one.
arguments
1 reg: regex object.
# OnigEncoding onig_get_encoding(regex_t* reg)
# OnigOptionType onig_get_options(regex_t* reg)
# OnigCaseFoldType onig_get_case_fold_flag(regex_t* reg)
# OnigSyntaxType* onig_get_syntax(regex_t* reg)
Return a value of the regex object.
arguments
1 reg: regex object.
# int onig_number_of_captures(regex_t* reg)
Return the number of capture group in the pattern.
arguments
1 reg: regex object.
# int onig_number_of_capture_histories(regex_t* reg)
Return the number of capture history defined in the pattern.
You can't use capture history if ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY
is disabled in the pattern syntax.(disabled in the default syntax)
arguments
1 reg: regex object.
# OnigCaptureTreeNode* onig_get_capture_tree(OnigRegion* region)
Return the root node of capture history data tree.
This value is undefined if matching has faild.
arguments
1 region: matching result.
# int onig_capture_tree_traverse(OnigRegion* region, int at,
int(*func)(int,OnigPosition,OnigPosition,int,int,void*),
void* arg)
Traverse and callback in capture history data tree.
normal return: 0
error: callback func's return value.
arguments
1 region: match region data.
2 at: callback position.
ONIG_TRAVERSE_CALLBACK_AT_FIRST: callback first, then traverse children.
ONIG_TRAVERSE_CALLBACK_AT_LAST: traverse children first, then callback.
ONIG_TRAVERSE_CALLBACK_AT_BOTH: callback first, then traverse children,
and at last callback again.
3 func: callback function.
if func does not return 0, then traverse is stopped.
int func(int group, OnigPosition beg, OnigPosition end,
int level, int at, void* arg)
group: group number
beg: capture start position
end: capture end position
level: nest level (from 0)
at: callback position
ONIG_TRAVERSE_CALLBACK_AT_FIRST
ONIG_TRAVERSE_CALLBACK_AT_LAST
arg: optional callback argument
4 arg; optional callback argument.
# int onig_noname_group_capture_is_active(regex_t* reg)
Return noname group capture activity.
active: 1
inactive: 0
arguments
1 reg: regex object.
if option ONIG_OPTION_DONT_CAPTURE_GROUP == ON
--> inactive
if the regex pattern have named group
and syntax ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP == ON
and option ONIG_OPTION_CAPTURE_GROUP == OFF
--> inactive
else --> active
# UChar* onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
Return previous character head address.
arguments
1 enc: character encoding
2 start: string address
3 s: target address of string
# UChar* onigenc_get_left_adjust_char_head(OnigEncoding enc,
const UChar* start, const UChar* s)
Return left-adjusted head address of a character.
arguments
1 enc: character encoding
2 start: string address
3 s: target address of string
# UChar* onigenc_get_right_adjust_char_head(OnigEncoding enc,
const UChar* start, const UChar* s)
Return right-adjusted head address of a character.
arguments
1 enc: character encoding
2 start: string address
3 s: target address of string
# int onigenc_strlen(OnigEncoding enc, const UChar* s, const UChar* end)
# int onigenc_strlen_null(OnigEncoding enc, const UChar* s)
Return number of characters in the string.
# int onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
Return number of bytes in the string.
# int onig_set_default_syntax(OnigSyntaxType* syntax)
Set default syntax.
arguments
1 syntax: address of pattern syntax definition.
# void onig_copy_syntax(OnigSyntaxType* to, const OnigSyntaxType* from)
Copy syntax.
arguments
1 to: destination address.
2 from: source address.
# unsigned int onig_get_syntax_op(OnigSyntaxType* syntax)
# unsigned int onig_get_syntax_op2(OnigSyntaxType* syntax)
# unsigned int onig_get_syntax_behavior(OnigSyntaxType* syntax)
# OnigOptionType onig_get_syntax_options(OnigSyntaxType* syntax)
# void onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op)
# void onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2)
# void onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior)
# void onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options)
Get/Set elements of the syntax.
arguments
1 syntax: syntax
2 op, op2, behavior, options: value of element.
# void onig_copy_encoding(OnigEncoding to, OnigOnigEncoding from)
Copy encoding.
arguments
1 to: destination address.
2 from: source address.
# int onig_set_meta_char(OnigSyntaxType* syntax, unsigned int what,
OnigCodePoint code)
Set a variable meta character to the code point value.
Except for an escape character, this meta characters specification
is not work, if ONIG_SYN_OP_VARIABLE_META_CHARACTERS is not effective
by the syntax. (Build-in syntaxes are not effective.)
normal return: ONIG_NORMAL
arguments
1 syntax: target syntax
2 what: specifies which meta character it is.
ONIG_META_CHAR_ESCAPE
ONIG_META_CHAR_ANYCHAR
ONIG_META_CHAR_ANYTIME
ONIG_META_CHAR_ZERO_OR_ONE_TIME
ONIG_META_CHAR_ONE_OR_MORE_TIME
ONIG_META_CHAR_ANYCHAR_ANYTIME
3 code: meta character or ONIG_INEFFECTIVE_META_CHAR.
# OnigCaseFoldType onig_get_default_case_fold_flag()
Get default case fold flag.
# int onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
Set default case fold flag.
1 case_fold_flag: case fold flag
# unsigned int onig_get_match_stack_limit_size(void)
Return the maximum number of stack size.
(default: 0 == unlimited)
# int onig_set_match_stack_limit_size(unsigned int size)
Set the maximum number of stack size.
(size = 0: unlimited)
normal return: ONIG_NORMAL
# int onig_end(void)
The use of this library is finished.
normal return: ONIG_NORMAL
It is not allowed to use regex objects which created
before onig_end() call.
# const char* onig_version(void)
Return version string. (ex. "5.0.3")
// END

628
src/Onigmo/doc/API.ja Normal file
View File

@ -0,0 +1,628 @@
Onigmo インターフェース Version 5.11.4 2011/10/08
#include <oniguruma.h>
# int onig_init(void)
ライブラリの初期化
onig_new()の中で呼び出されるので、この関数を明示的に呼び出さなくてもよい。
# int onig_error_code_to_str(UChar* err_buf, OnigPosition err_code, ...)
エラーメッセージを取得する。
この関数を、onig_new()の結果に対して呼び出す場合には、onig_new()のpattern引数を
メモリ解放するよりも前に呼び出さなければならない。
正常終了戻り値: エラーメッセージ文字列のバイト長
引数
1 err_buf: エラーメッセージを格納する領域
(必要なサイズ: ONIG_MAX_ERROR_MESSAGE_LEN)
2 err_code: エラーコード
3 err_info (optional): onig_new()のerr_info
# void onig_set_warn_func(OnigWarnFunc func)
警告通知関数をセットする。
警告:
'[', '-', ']' in character class without escape.
']' in pattern without escape.
引数
1 func: 警告関数 void (*func)(char* warning_message)
# void onig_set_verb_warn_func(OnigWarnFunc func)
詳細警告通知関数をセットする。
詳細警告:
redundant nested repeat operator.
引数
1 func: 詳細警告関数 void (*func)(char* warning_message)
# int onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
OnigErrorInfo* err_info)
正規表現オブジェクト(regex)を作成する。
正常終了戻り値: ONIG_NORMAL
引数
1 reg: 作成された正規表現オブジェクトを返すアドレス
2 pattern: 正規表現パターン文字列
3 pattern_end: 正規表現パターン文字列の終端アドレス(pattern + pattern length)
4 option: 正規表現コンパイル時オプション
ONIG_OPTION_NONE オプションなし
ONIG_OPTION_SINGLELINE '^' -> '\A', '$' -> '\Z'
ONIG_OPTION_DOTALL '.'が改行にマッチする
ONIG_OPTION_MULTILINE ONIG_OPTION_DOTALLと同じ
ONIG_OPTION_IGNORECASE 曖昧マッチ オン
ONIG_OPTION_EXTEND パターン拡張形式
ONIG_OPTION_FIND_LONGEST 最長マッチ
ONIG_OPTION_FIND_NOT_EMPTY 空マッチを無視
ONIG_OPTION_NEGATE_SINGLELINE
ONIG_SYNTAX_POSIX_BASIC, ONIG_SYNTAX_POSIX_EXTENDED,
ONIG_SYNTAX_PERL, ONIG_SYNTAX_PERL58, ONIG_SYNTAX_PERL58_NG,
ONIG_SYNTAX_JAVA, ONIG_SYNTAX_PYTHONで
デフォルトで有効なONIG_OPTION_SINGLELINEをクリアする。
ONIG_OPTION_DONT_CAPTURE_GROUP 名前付き捕獲式集合のみ捕獲
ONIG_OPTION_CAPTURE_GROUP 名前無し捕獲式集合も捕獲
ONIG_OPTION_NEWLINE_CRLF
CR+LFも改行として扱う。(デフォルトではLFのみ。)
この機能を使うには、regenc.hの以下の行を有効にする必要がある。
/* #define USE_CRNL_AS_LINE_TERMINATOR */
5 enc: 文字エンコーディング
ONIG_ENCODING_ASCII ASCII
ONIG_ENCODING_ISO_8859_1 ISO 8859-1
ONIG_ENCODING_ISO_8859_2 ISO 8859-2
ONIG_ENCODING_ISO_8859_3 ISO 8859-3
ONIG_ENCODING_ISO_8859_4 ISO 8859-4
ONIG_ENCODING_ISO_8859_5 ISO 8859-5
ONIG_ENCODING_ISO_8859_6 ISO 8859-6
ONIG_ENCODING_ISO_8859_7 ISO 8859-7
ONIG_ENCODING_ISO_8859_8 ISO 8859-8
ONIG_ENCODING_ISO_8859_9 ISO 8859-9
ONIG_ENCODING_ISO_8859_10 ISO 8859-10
ONIG_ENCODING_ISO_8859_11 ISO 8859-11
ONIG_ENCODING_ISO_8859_13 ISO 8859-13
ONIG_ENCODING_ISO_8859_14 ISO 8859-14
ONIG_ENCODING_ISO_8859_15 ISO 8859-15
ONIG_ENCODING_ISO_8859_16 ISO 8859-16
ONIG_ENCODING_UTF8 UTF-8
ONIG_ENCODING_UTF16_BE UTF-16BE
ONIG_ENCODING_UTF16_LE UTF-16LE
ONIG_ENCODING_UTF32_BE UTF-32BE
ONIG_ENCODING_UTF32_LE UTF-32LE
ONIG_ENCODING_EUC_JP EUC-JP
ONIG_ENCODING_EUC_TW EUC-TW
ONIG_ENCODING_EUC_KR EUC-KR
ONIG_ENCODING_EUC_CN EUC-CN
ONIG_ENCODING_SJIS Shift_JIS
ONIG_ENCODING_KOI8_R KOI8-R
ONIG_ENCODING_CP1251 CP1251
ONIG_ENCODING_BIG5 Big5
ONIG_ENCODING_GB18030 GB18030
または、ユーザが定義したOnigEncodingTypeデータのアドレス
6 syntax: 正規表現パターン文法定義
ONIG_SYNTAX_ASIS plain text
ONIG_SYNTAX_POSIX_BASIC POSIX Basic RE
ONIG_SYNTAX_POSIX_EXTENDED POSIX Extended RE
ONIG_SYNTAX_EMACS Emacs
ONIG_SYNTAX_GREP grep
ONIG_SYNTAX_GNU_REGEX GNU regex
ONIG_SYNTAX_JAVA Java (Sun java.util.regex)
ONIG_SYNTAX_PERL58 Perl 5.8
ONIG_SYNTAX_PERL58_NG Perl 5.8 + 名前付き捕獲式集合
ONIG_SYNTAX_PERL Perl 5.10以降
ONIG_SYNTAX_PYTHON Python
ONIG_SYNTAX_RUBY Ruby
ONIG_SYNTAX_DEFAULT default (== Ruby)
onig_set_default_syntax()
または、ユーザが定義したOnigSyntaxTypeデータのアドレス
7 err_info: エラー情報を返すためのアドレス
onig_error_code_to_str()の三番目の引数として使用する
# int onig_new_without_alloc(regex_t* reg, const UChar* pattern,
const UChar* pattern_end,
OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
OnigErrorInfo* err_info)
正規表現オブジェクト(regex)を作成する。
regの領域を内部で割り当てない。
正常終了戻り値: ONIG_NORMAL
# int onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
OnigCompileInfo* ci, OnigErrorInfo* einfo)
正規表現オブジェクト(regex)を作成する。
この関数は、onig_new()のデラックス版。
正常終了戻り値: ONIG_NORMAL
引数
1 reg: 作成された正規表現オブジェクトを返すアドレス
2 pattern: 正規表現パターン文字列
3 pattern_end: 正規表現パターン文字列の終端アドレス(pattern + pattern length)
4 ci: コンパイル情報
ci->num_of_elements: ciの要素数 (現在の版では: 5)
ci->pattern_enc: パターン文字列の文字エンコーディング
ci->target_enc: 対象文字列の文字エンコーディング
ci->syntax: 正規表現パターン文法定義
ci->option: 正規表現コンパイル時オプション
ci->case_fold_flag: ONIG_OPTION_IGNORECASEモードでの
文字曖昧マッチ指定ビットフラグ
ONIGENC_CASE_FOLD_MIN: 最小
ONIGENC_CASE_FOLD_DEFAULT: 最小
onig_set_default_case_fold_flag()
5 err_info: エラー情報を返すためのアドレス
onig_error_code_to_str()の三番目の引数として使用する
異なる文字エンコーディングの組み合わせは、以下の場合にのみ許される。
pattern_enc: ASCII, ISO_8859_1
target_enc: UTF16_BE, UTF16_LE, UTF32_BE, UTF32_LE
pattern_enc: UTF16_BE/LE
target_enc: UTF16_LE/BE
pattern_enc: UTF32_BE/LE
target_enc: UTF32_LE/BE
# void onig_free(regex_t* reg)
正規表現オブジェクトのメモリを解放する。
引数
1 reg: 正規表現オブジェクト
# void onig_free_body(regex_t* reg)
正規表現オブジェクトのメモリを解放する。(reg自身の領域を除いて)
引数
1 reg: 正規表現オブジェクト
# OnigPosition onig_search(regex_t* reg, const UChar* str, const UChar* end,
const UChar* start, const UChar* range, OnigRegion* region,
OnigOptionType option)
# OnigPosition onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
const UChar* global_pos,
const UChar* start, const UChar* range, OnigRegion* region,
OnigOptionType option)
正規表現で文字列を検索し、検索結果とマッチ領域を返す。
正常終了戻り値: マッチ位置 (p - str >= 0)
検索失敗: ONIG_MISMATCH (< 0)
引数
1 reg: 正規表現オブジェクト
2 str: 検索対象文字列
3 end: 検索対象文字列の終端アドレス
4 global_pos: \Gのアドレス
不要時はstrと同じ値を設定すること
5 start: 検索対象文字列の検索先頭位置アドレス
6 range: 検索対象文字列の検索終了位置アドレス
前方探索 (start <= 探索される文字列 < range)
後方探索 (range <= 探索される文字列 <= start)
7 region: マッチ領域情報(region) (NULLも許される)
8 option: 検索時オプション
ONIG_OPTION_NOTBOL 文字列の先頭(str)を行頭と看做さない
ONIG_OPTION_NOTEOL 文字列の終端(end)を行末と看做さない
ONIG_OPTION_POSIX_REGION region引数をPOSIX APIのregmatch_t[]にする
# OnigPosition onig_match(regex_t* reg, const UChar* str, const UChar* end,
const UChar* at, OnigRegion* region, OnigOptionType option)
文字列の指定位置でマッチングを行い、結果とマッチ領域を返す。
正常終了戻り値: マッチしたバイト長 (>= 0)
not match: ONIG_MISMATCH ( < 0)
引数
1 reg: 正規表現オブジェクト
2 str: 検索対象文字列
3 end: 検索対象文字列の終端アドレス
4 at: 検索対象文字列の検索アドレス
5 region: マッチ領域情報(region) (NULLも許される)
6 option: 検索時オプション
ONIG_OPTION_NOTBOL 文字列の先頭(str)を行頭と看做さない
ONIG_OPTION_NOTEOL 文字列の終端(end)を行末と看做さない
ONIG_OPTION_POSIX_REGION region引数をPOSIX APIのregmatch_t[]にする
# OnigRegion* onig_region_new(void)
マッチ領域情報(region)を作成する。
# void onig_region_free(OnigRegion* region, int free_self)
マッチ領域情報(region)で使用されているメモリを解放する。
引数
1 region: マッチ領域情報オブジェクト
2 free_self: [1: region自身を含めて全て解放, 0: region自身は解放しない]
# void onig_region_copy(OnigRegion* to, OnigRegion* from)
マッチ領域情報(region)を複製する。
引数
1 to: 対象領域
2 from: 元領域
# void onig_region_clear(OnigRegion* region)
マッチ領域情報(region)の中味をクリアする。
引数
1 region: 対象領域
# int onig_region_resize(OnigRegion* region, int n)
マッチ領域情報(region)の捕獲式集合(グループ)数を変更する。
正常終了戻り値: ONIG_NORMAL
引数
1 region: 対象領域
2 n: 新しいサイズ
# int onig_name_to_group_numbers(regex_t* reg, const UChar* name, const UChar* name_end,
int** num_list)
指定した名前に対する名前付き捕獲式集合(グループ)の
グループ番号リストを返す。
名前付き捕獲式集合は、(?<name>....)によって定義できる。
正常終了戻り値: 指定された名前に対するグループ数
(例 /(?<x>..)(?<x>..)/ ==> 2)
名前に対するグループが存在しない: -1
引数
1 reg: 正規表現オブジェクト
2 name: 捕獲式集合(グループ)名
3 name_end: 捕獲式集合(グループ)名の終端アドレス
4 num_list: 番号リストを返すアドレス
# int onig_name_to_backref_number(regex_t* reg, const UChar* name, const UChar* name_end,
OnigRegion *region)
指定された名前の後方参照(\k<name>)に対する捕獲式集合(グループ)の番号を返す。
名前に対して、複数のマッチ領域が有効であれば、その中の最大の番号を返す。
名前に対する捕獲式集合が一個しかないときには、対応するマッチ領域が有効か
どうかに関係なく、その番号を返す。(従って、regionにはNULLを渡してもよい。)
正常終了戻り値: 番号
引数
1 reg: 正規表現オブジェクト
2 name: 捕獲式集合(グループ)名
3 name_end: 捕獲式集合(グループ)名の終端アドレス
4 region: search/match結果のマッチ領域
# int onig_foreach_name(regex_t* reg,
int (*func)(const UChar*, const UChar*, int,int*,regex_t*,void*),
void* arg)
全ての名前に対してコールバック関数呼び出しを実行する。
正常終了戻り値: 0
エラー: コールバック関数の戻り値
引数
1 reg: 正規表現オブジェクト
2 func: コールバック関数
func(name, name_end, <number of groups>, <group number's list>,
reg, arg);
funcが0以外の値を返すと、それ以降のコールバックは行なわずに
終了する。
3 arg: funcに対する追加引数
# int onig_number_of_names(regex_t* reg)
パターン中で定義された名前の数を返す。
一個の名前の多重定義は一個と看做す。
引数
1 reg: 正規表現オブジェクト
# OnigEncoding onig_get_encoding(regex_t* reg)
# OnigOptionType onig_get_options(regex_t* reg)
# OnigCaseFoldType onig_get_case_fold_flag(regex_t* reg)
# OnigSyntaxType* onig_get_syntax(regex_t* reg)
正規表現オブジェクトに対して、対応する値を返す。
引数
1 reg: 正規表現オブジェクト
# int onig_number_of_captures(regex_t* reg)
パターン中で定義された捕獲グループの数を返す。
引数
1 reg: 正規表現オブジェクト
# int onig_number_of_capture_histories(regex_t* reg)
パターン中で定義された捕獲履歴(?@...)の数を返す。
使用する文法で捕獲履歴機能が有効(ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)
でなければ、捕獲履歴機能は使用できない。
引数
1 reg: 正規表現オブジェクト
# OnigCaptureTreeNode* onig_get_capture_tree(OnigRegion* region)
捕獲履歴データのルートノードを返す。
マッチが失敗している場合には、この値は不定である。
引数
1 region: マッチ領域
# int onig_capture_tree_traverse(OnigRegion* region, int at,
int(*func)(int,OnigPosition,OnigPosition,int,int,void*),
void* arg)
捕獲履歴データ木を巡回してコールバックする。
正常終了戻り値: 0
エラー: コールバック関数の戻り値
引数
1 region: マッチ領域
2 at: コールバックを行なうタイミング
ONIG_TRAVERSE_CALLBACK_AT_FIRST:
最初にコールバックして、子ノードを巡回
ONIG_TRAVERSE_CALLBACK_AT_LAST:
子ノードを巡回して、コールバック
ONIG_TRAVERSE_CALLBACK_AT_BOTH:
最初にコールバックして、子ノードを巡回、最後にもう一度コールバック
3 func: コールバック関数
funcが0以外の値を返すと、それ以降の巡回は行なわずに
終了する。
int func(int group, OnigPosition beg, OnigPosition end,
int level, int at, void* arg)
group: グループ番号
beg: マッチ開始位置
end マッチ終了位置
level: ネストレベル (0から)
at: コールバックが呼び出されたタイミング
ONIG_TRAVERSE_CALLBACK_AT_FIRST
ONIG_TRAVERSE_CALLBACK_AT_LAST
arg: 追加引数
4 arg; funcに対する追加引数
# int onig_noname_group_capture_is_active(regex_t* reg)
名前なし式集合の捕獲機能が有効かどうかを返す。
有効: 1
無効: 0
引数
1 reg: 正規表現オブジェクト
オプションのONIG_OPTION_DONT_CAPTURE_GROUPがON --> 無効
パターンが名前つき式集合を使用している
AND 使用文法で、ONIG_SYN_CAPTURE_ONLY_NAMED_GROUPがON
AND オプションのONIG_OPTION_CAPTURE_GROUPがOFF
--> 無効
上記以外の場合 --> 有効
# UChar* onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
文字一個分前の文字列位置を返す。
引数
1 enc: 文字エンコーディング
2 start: 文字列の先頭アドレス
3 s: 文字列中の位置
# UChar* onigenc_get_left_adjust_char_head(OnigEncoding enc,
const UChar* start, const UChar* s)
文字の先頭バイト位置になるように左側に調整したアドレスを返す。
引数
1 enc: 文字エンコーディング
2 start: 文字列の先頭アドレス
3 s: 文字列中の位置
# UChar* onigenc_get_right_adjust_char_head(OnigEncoding enc,
const UChar* start, const UChar* s)
文字の先頭バイト位置になるように右側に調整したアドレスを返す。
引数
1 enc: 文字エンコーディング
2 start: 文字列の先頭アドレス
3 s: 文字列中の位置
# int onigenc_strlen(OnigEncoding enc, const UChar* s, const UChar* end)
# int onigenc_strlen_null(OnigEncoding enc, const UChar* s)
文字列の文字数を返す。
# int onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
文字列のバイト数を返す。
# int onig_set_default_syntax(OnigSyntaxType* syntax)
デフォルトの正規表現パターン文法をセットする。
引数
1 syntax: 正規表現パターン文法
# void onig_copy_syntax(OnigSyntaxType* to, const OnigSyntaxType* from)
正規表現パターン文法をコピーする。
引数
1 to: 対象
2 from: 元
# unsigned int onig_get_syntax_op(OnigSyntaxType* syntax)
# unsigned int onig_get_syntax_op2(OnigSyntaxType* syntax)
# unsigned int onig_get_syntax_behavior(OnigSyntaxType* syntax)
# OnigOptionType onig_get_syntax_options(OnigSyntaxType* syntax)
# void onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op)
# void onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2)
# void onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior)
# void onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options)
正規表現パターン文法の要素を参照/取得する。
引数
1 syntax: 正規表現パターン文法
2 op, op2, behavior, options: 要素の値
# void onig_copy_encoding(OnigEncoding to, OnigOnigEncoding from)
文字エンコーディングをコピーする。
引数
1 to: 対象
2 from: 元
# int onig_set_meta_char(OnigSyntaxType* syntax, unsigned int what,
OnigCodePoint code)
メタ文字を指定したコードポイント値にセットする。
ONIG_SYN_OP_VARIABLE_META_CHARACTERSが正規表現パターン文法で有効に
なっていない場合には、エスケープ文字を除いて、ここで指定したメタ文字は
機能しない。(組込みの文法では有効にしていない。)
正常終了戻り値: ONIG_NORMAL
引数
1 syntax: 対象文法
2 what: メタ文字機能の指定
ONIG_META_CHAR_ESCAPE
ONIG_META_CHAR_ANYCHAR
ONIG_META_CHAR_ANYTIME
ONIG_META_CHAR_ZERO_OR_ONE_TIME
ONIG_META_CHAR_ONE_OR_MORE_TIME
ONIG_META_CHAR_ANYCHAR_ANYTIME
3 code: メタ文字のコードポイント または ONIG_INEFFECTIVE_META_CHAR.
# OnigCaseFoldType onig_get_default_case_fold_flag()
デフォルトのcase foldフラグを取得する。
# int onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
デフォルトのcase foldフラグをセットする。
引数
1 case_fold_flag: case foldフラグ
# unsigned int onig_get_match_stack_limit_size(void)
マッチスタックサイズの最大値を返す。
(デフォルト: 0 == 無制限)
# int onig_set_match_stack_limit_size(unsigned int size)
マッチスタックサイズの最大値を指定する。
(size = 0: 無制限)
正常終了戻り値: ONIG_NORMAL
# int onig_end(void)
ライブラリの使用を終了する。
正常終了戻り値: ONIG_NORMAL
onig_init()を再度呼び出しても、以前に作成した正規表現オブジェクト
を使用することはできない。
# const char* onig_version(void)
バージョン文字列を返す。(例 "5.0.3")
// END

47
src/Onigmo/doc/FAQ Normal file
View File

@ -0,0 +1,47 @@
FAQ 2011/09/18
1. Longest match
You can execute longest match by using ONIG_OPTION_FIND_LONGEST option
in onig_new().
2. Thread safe
In order to make thread safe, which of (A) or (B) must be done.
(A) Onigmo Layer
Define the macro below in oniguruma/regint.h.
USE_MULTI_THREAD_SYSTEM
THREAD_ATOMIC_START
THREAD_ATOMIC_END
THREAD_PASS
THREAD_SYSTEM_INIT
THREAD_SYSTEM_END
(B) Application Layer
The plural threads should not do simultaneously that making
new regexp objects or re-compiling objects or freeing objects,
even if these objects are differ.
3. CR + LF
DOS newline (CR (0x0c) + LF (0x0a) sequence)
Enable the following line in regenc.h, and use ONIG_OPTION_NEWLINE_CRLF
option in onig_new().
/* #define USE_CRNL_AS_LINE_TERMINATOR */
4. Mailing list
There is no mailing list about Onigmo/Oniguruma.
// END

131
src/Onigmo/doc/FAQ.ja Normal file
View File

@ -0,0 +1,131 @@
FAQ 2011/09/18
1. 最長マッチ
onig_new()の中で、ONIG_OPTION_FIND_LONGESTオプション
を使用すれば最長マッチになる。
2. スレッドセーフ
スレッドセーフにするには、以下の(A)と(B)のどちらかを行なえば
よい。
(A) Onigmo Layer
oniguruma/regint.hの中の以下のマクロを定義する。
USE_MULTI_THREAD_SYSTEM
THREAD_ATOMIC_START
THREAD_ATOMIC_END
THREAD_PASS
何らかの初期化/終了処理が必要であれば、以下のマクロに定義する。
THREAD_SYSTEM_INIT
THREAD_SYSTEM_END
(B) Application Layer
同時に複数のスレッドが、正規表現オブジェクトを作成する、
または解放する、ことを行なってはならない。
それらのオブジェクトが全く別のものであっても。
もう少し詳しい説明は、このドキュメントの中の
"スレッドセーフに関する補足"に書いておいた。
3. CR + LF
DOSの改行(CR(0x0c) + LF(0x0a)の連続)
regenc.hの中の、以下の部分を有効にし、onig_new()で
ONIG_OPTION_NEWLINE_CRLFオプションを使用する。
/* #define USE_CRNL_AS_LINE_TERMINATOR */
4. メーリングリスト
鬼雲/鬼車に関するメーリングリストは存在しない。
//END
スレッドセーフに関する補足
スレッドセーフにするには、個別のアプリケーションの中で行うか、
Onigurumaライブラリの中で行うか、どちらかを選ぶことができます。
(Onigurumaを使用する側で対処するか、Onigurumaに対処させるか
どちらか片方で行う必要があるということです。)
これらの方法について、以下(A)と(B)で説明します。
マルチスレッドAPIは、それぞれのプラットフォームによっても
異なりますので、以下の説明の中で具体的に何を呼ぶのかを
書くことは無理です。実際に使用されるマルチスレッドAPIで、
対応する機能のものを指定してください。
(A) Onigurumaの中で対応する場合
oniguruma/regint.hの中で以下のマクロを定義して再コンパイルしてください。
USE_MULTI_THREAD_SYSTEM
単に有効にすればよいです。
THREAD_ATOMIC_START
THREAD_ATOMIC_END
THREAD_ATOMIC_STARTからTHREAD_ATOMIC_ENDで囲まれた
プログラムのコード部分をあるスレッドが実行中に、他の
スレッドに実行権が移動しないことを保障するものに定義
してください。
(名前の通り、囲まれたコード部分をスレッドアトミックに
するという意味)
THREAD_PASS
これを実行したスレッドから、他のスレッドに実行権を委譲
するものに定義をしてください。(再スケジュールを呼び出す
という意味)
対応する機能が全くなければ、空定義にしてください。
(参考例)
Rubyの場合を例にすると、
Rubyは自分自身で独自のスレッド機能を実装しています。
その機能を使用すると、以下のように定義すればよいことに
なります。
#define USE_MULTI_THREAD_SYSTEM
#define THREAD_SYSTEM_INIT
#define THREAD_SYSTEM_END
#define THREAD_ATOMIC_START DEFER_INTS
#define THREAD_ATOMIC_END ENABLE_INTS
#define THREAD_PASS rb_thread_schedule()
Rubyの場合、タイマ割り込みを使用して、スレッドの切り替えを
行っています。DEFER_INTSは割り込みハンドラの実行を一時的に
止めるためのマクロです。ENABLE_INTSマクロで割り込みハンドラ
の実行を許可します。
これによって、THREAD_ATOMIC_STARTからTHREAD_ATOMIC_END
で囲まれた部分の実行中に、他のスレッドに実行権が移動しません。
(B) アプリケーションの中で対応する場合
以下を保障するように、スレッドの実行を制御してください。
同時に複数のスレッドが、正規表現オブジェクトを作成する、または解放する、ことを
行なってはならない。それらのオブジェクトが全く別のものであっても。
onig_new(), onig_new_deluxe(), onig_free()のどれかの呼び出しを、
複数のスレッドが同時に実行することを避けてください。同時でなければ別にかまいません。
これは何故必要なのかというと、正規表現オブジェクトを作成する
過程で、内部で共通に参照するテーブルがあります。
このテーブルに対してのデータ登録処理が複数のスレッドで衝突して
異常な状態にならないために必要です。
// END

522
src/Onigmo/doc/RE Normal file
View File

@ -0,0 +1,522 @@
Onigmo (Oniguruma-mod) Regular Expressions Version 5.13.0 2012/01/19
syntax: ONIG_SYNTAX_RUBY (default)
1. Syntax elements
\ escape (enable or disable meta character meaning)
| alternation
(...) group
[...] character class
2. Characters
\t horizontal tab (0x09)
\v vertical tab (0x0B)
\n newline (0x0A)
\r return (0x0D)
\b back space (0x08)
\f form feed (0x0C)
\a bell (0x07)
\e escape (0x1B)
\nnn octal char (encoded byte value)
\xHH hexadecimal char (encoded byte value)
\x{7HHHHHHH} wide hexadecimal char (character code point value)
\cx control char (character code point value)
\C-x control char (character code point value)
\M-x meta (x|0x80) (character code point value)
\M-\C-x meta control char (character code point value)
(* \b is effective in character class [...] only)
3. Character types
. any character (except newline)
\w word character
Not Unicode:
alphanumeric and "_".
Unicode:
General_Category -- (Letter|Mark|Number|Connector_Punctuation)
It depends on ONIG_OPTION_ASCII_RANGE option that non-ASCII char
includes or not.
\W non word char
\s whitespace char
Not Unicode:
\t, \n, \v, \f, \r, \x20
Unicode:
0009, 000A, 000B, 000C, 000D, 0085(NEL),
General_Category -- Line_Separator
-- Paragraph_Separator
-- Space_Separator
It depends on ONIG_OPTION_ASCII_RANGE option that non-ASCII char
includes or not.
\S non whitespace char
\d decimal digit char
Unicode: General_Category -- Decimal_Number
It depends on ONIG_OPTION_ASCII_RANGE option that non-ASCII char
includes or not.
\D non decimal digit char
\h hexadecimal digit char [0-9a-fA-F]
\H non hexadecimal digit char
Character Property
* \p{property-name}
* \p{^property-name} (negative)
* \P{property-name} (negative)
property-name:
+ works on all encodings
Alnum, Alpha, Blank, Cntrl, Digit, Graph, Lower,
Print, Punct, Space, Upper, XDigit, Word, ASCII,
+ works on EUC_JP, Shift_JIS, CP932
Hiragana, Katakana, Han, Latin, Greek, Cyrillic
+ works on UTF8, UTF16, UTF32
see UnicodeProps.txt
\R Linebreak
Unicode:
(?>\x0D\x0A|[\x0A-\x0D\x{85}\x{2028}\x{2029}])
Not Unicode:
(?>\x0D\x0A|[\x0A-\x0D])
\X eXtended grapheme cluster
Unicode:
(?>\P{M}\p{M}*)
Not Unicode:
(?m:.)
4. Quantifier
greedy
? 1 or 0 times
* 0 or more times
+ 1 or more times
{n,m} at least n but not more than m times
{n,} at least n times
{,n} at least 0 but not more than n times ({0,n})
{n} n times
reluctant
?? 1 or 0 times
*? 0 or more times
+? 1 or more times
{n,m}? at least n but not more than m times
{n,}? at least n times
{,n}? at least 0 but not more than n times (== {0,n}?)
possessive (greedy and does not backtrack after repeated)
?+ 1 or 0 times
*+ 0 or more times
++ 1 or more times
({n,m}+, {n,}+, {n}+ are possessive op. in ONIG_SYNTAX_JAVA and
ONIG_SYNTAX_PERL only)
ex. /a*+/ === /(?>a*)/
5. Anchors
^ beginning of the line
$ end of the line
\b word boundary
\B not word boundary
\A beginning of string
\Z end of string, or before newline at the end
\z end of string
\G matching start position
6. Character class
^... negative class (lowest precedence operator)
x-y range from x to y
[...] set (character class in character class)
..&&.. intersection (low precedence at the next of ^)
ex. [a-w&&[^c-g]z] ==> ([a-w] AND ([^c-g] OR z)) ==> [abh-w]
* If you want to use '[', '-', ']' as a normal character
in a character class, you should escape these characters by '\'.
POSIX bracket ([:xxxxx:], negate [:^xxxxx:])
Not Unicode Case:
alnum alphabet or digit char
alpha alphabet
ascii code value: [0 - 127]
blank \t, \x20
cntrl
digit 0-9
graph \x21-\x7E and all of multibyte encoded characters
lower
print \x20-\x7E and all of multibyte encoded characters
punct
space \t, \n, \v, \f, \r, \x20
upper
xdigit 0-9, a-f, A-F
word alphanumeric, "_" and multibyte characters
Unicode Case:
alnum Letter | Mark | Decimal_Number
alpha Letter | Mark
ascii 0000 - 007F
blank Space_Separator | 0009
cntrl Control | Format | Unassigned | Private_Use | Surrogate
digit Decimal_Number
graph [[:^space:]] && ^Control && ^Unassigned && ^Surrogate
lower Lowercase_Letter
print [[:graph:]] | Space_Separator
punct Connector_Punctuation | Dash_Punctuation | Close_Punctuation |
Final_Punctuation | Initial_Punctuation | Other_Punctuation |
Open_Punctuation
space Space_Separator | Line_Separator | Paragraph_Separator |
0009 | 000A | 000B | 000C | 000D | 0085
upper Uppercase_Letter
xdigit 0030 - 0039 | 0041 - 0046 | 0061 - 0066
(0-9, a-f, A-F)
word Letter | Mark | Decimal_Number | Connector_Punctuation
It depends on ONIG_OPTION_ASCII_RANGE option and
ONIG_OPTION_POSIX_BRACKET_ALL_RANGE option that POSIX brackets
match non-ASCII char or not.
7. Extended groups
(?#...) comment
(?imxdau-imx) option on/off
i: ignore case
m: multi-line (dot(.) match newline)
x: extended form
character set option (character range option)
d: Default (compatible with Ruby 1.9.3)
\w, \d and \s doesn't match non-ASCII characters.
\b, \B and POSIX brackets use the each encoding's
rules.
a: ASCII
ONIG_OPTION_ASCII_RANGE option is turned on.
\w, \d, \s and POSIX brackets doesn't match
non-ASCII characters.
\b and \B use the ASCII rules.
u: Unicode
ONIG_OPTION_ASCII_RANGE option is turned off.
\w (\W), \d (\D), \s (\S), \b (\B) and POSIX
brackets use the each encoding's rules.
(?imxdau-imx:subexp)
option on/off for subexp
(?:subexp) not captured group
(subexp) captured group
(?=subexp) look-ahead
(?!subexp) negative look-ahead
(?<=subexp) look-behind
(?<!subexp) negative look-behind
Subexp of look-behind must be fixed character length.
But different character length is allowed in top level
alternatives only.
ex. (?<=a|bc) is OK. (?<=aaa(?:b|cd)) is not allowed.
In negative-look-behind, captured group isn't allowed,
but shy group(?:) is allowed.
\K keep
Another expression of look-behind. Keep the stuff left
of the \K, don't include it in the result.
(?>subexp) atomic group
don't backtrack in subexp.
(?<name>subexp), (?'name'subexp)
define named group
(All characters of the name must be a word character.)
Not only a name but a number is assigned like a captured
group.
Assigning the same name as two or more subexps is allowed.
In this case, a subexp call can not be performed although
the back reference is possible.
(ONIG_SYNTAX_PERL: a subexp call is allowed in this case.)
(?(cond)yes-subexp), (?(cond)yes-subexp|no-subexp)
conditional expression
Matches yes-subexp if (cond) yields a true value, matches
no-subexp otherwise.
Following (cond) can be used:
(n) (n >= 1)
Checks if the numbered capturing group has matched
something.
(<name>), ('name')
Checks if a group with the given name has matched
something.
8. Back reference
\n back reference by group number (n >= 1)
\k<n> back reference by group number (n >= 1)
\k'n' back reference by group number (n >= 1)
\k<-n> back reference by relative group number (n >= 1)
\k'-n' back reference by relative group number (n >= 1)
\k<name> back reference by group name
\k'name' back reference by group name
In the back reference by the multiplex definition name,
a subexp with a large number is referred to preferentially.
(When not matched, a group of the small number is referred to.)
* Back reference by group number is forbidden if named group is defined
in the pattern and ONIG_OPTION_CAPTURE_GROUP is not set.
* ONIG_SYNTAX_PERL: \g{n}, \g{-n} and \g{name} can also be used.
back reference with nest level
level: 0, 1, 2, ...
\k<n+level> (n >= 1)
\k<n-level> (n >= 1)
\k'n+level' (n >= 1)
\k'n-level' (n >= 1)
\k<-n+level> (n >= 1)
\k<-n-level> (n >= 1)
\k'-n+level' (n >= 1)
\k'-n-level' (n >= 1)
\k<name+level>
\k<name-level>
\k'name+level'
\k'name-level'
Destinate relative nest level from back reference position.
ex 1.
/\A(?<a>|.|(?:(?<b>.)\g<a>\k<b+0>))\z/.match("reer")
ex 2.
r = Regexp.compile(<<'__REGEXP__'.strip, Regexp::EXTENDED)
(?<element> \g<stag> \g<content>* \g<etag> ){0}
(?<stag> < \g<name> \s* > ){0}
(?<name> [a-zA-Z_:]+ ){0}
(?<content> [^<&]+ (\g<element> | [^<&]+)* ){0}
(?<etag> </ \k<name+1> >){0}
\g<element>
__REGEXP__
p r.match('<foo>f<bar>bbb</bar>f</foo>').captures
9. Subexp call ("Tanaka Akira special")
\g<name> call by group name
\g'name' call by group name
\g<n> call by group number (n >= 1)
\g'n' call by group number (n >= 1)
\g<0> call the whole pattern recursively
\g'0' call the whole pattern recursively
\g<-n> call by relative group number (n >= 1)
\g'-n' call by relative group number (n >= 1)
\g<+n> call by relative group number (n >= 1)
\g'+n' call by relative group number (n >= 1)
* left-most recursive call is not allowed.
ex. (?<name>a|\g<name>b) => error
(?<name>a|b\g<name>c) => OK
* Call by group number is forbidden if named group is defined in the pattern
and ONIG_OPTION_CAPTURE_GROUP is not set.
* If the option status of called group is different from calling position
then the group's option is effective.
ex. (?-i:\g<name>)(?i:(?<name>a)){0} match to "A"
* ONIG_SYNTAX_PERL: use (?&name), (?n), (?-n), (?+n), (?R) or (?0) instead.
10. Captured group
Behavior of the no-named group (...) changes with the following conditions.
(But named group is not changed.)
case 1. /.../ (named group is not used, no option)
(...) is treated as a captured group.
case 2. /.../g (named group is not used, 'g' option)
(...) is treated as a no-captured group (?:...).
case 3. /..(?<name>..)../ (named group is used, no option)
(...) is treated as a no-captured group (?:...).
numbered-backref/call is not allowed.
case 4. /..(?<name>..)../G (named group is used, 'G' option)
(...) is treated as a captured group.
numbered-backref/call is allowed.
where
g: ONIG_OPTION_DONT_CAPTURE_GROUP
G: ONIG_OPTION_CAPTURE_GROUP
('g' and 'G' options are argued in ruby-dev ML)
-----------------------------
A-1. Syntax depend options
+ ONIG_SYNTAX_RUBY
(?m): dot(.) match newline
+ ONIG_SYNTAX_PERL, ONIG_SYNTAX_JAVA and ONIG_SYNTAX_PYTHON
(?s): dot(.) match newline
(?m): ^ match after newline, $ match before newline
+ ONIG_SYNTAX_PERL
(?d), (?l): same as (?u)
A-2. Original extensions
+ hexadecimal digit char type \h, \H
+ named group (?<name>...), (?'name'...)
+ named backref \k<name>
+ subexp call \g<name>, \g<group-num>
A-3. Lacked features compare with perl 5.14.0
+ \N{name}, \N{U+xxxx}, \N
+ \l,\u,\L,\U, \C
+ \v, \V, \h, \H, \o{xxx}
+ (?{code})
+ (??{code})
+ (?|...)
+ (*VERB:ARG)
* \Q...\E
This is effective on ONIG_SYNTAX_PERL and ONIG_SYNTAX_JAVA.
A-4. Differences with Japanized GNU regex(version 0.12) of Ruby 1.8
+ add character property (\p{property}, \P{property})
+ add hexadecimal digit char type (\h, \H)
+ add look-behind
(?<=fixed-char-length-pattern), (?<!fixed-char-length-pattern)
+ add possessive quantifier. ?+, *+, ++
+ add operations in character class. [], &&
('[' must be escaped as an usual char in character class.)
+ add named group and subexp call.
+ octal or hexadecimal number sequence can be treated as
a multibyte code char in character class if multibyte encoding
is specified.
(ex. [\xa1\xa2], [\xa1\xa7-\xa4\xa1])
+ allow the range of single byte char and multibyte char in character
class.
ex. /[a-<<any EUC-JP character>>]/ in EUC-JP encoding.
+ effect range of isolated option is to next ')'.
ex. (?:(?i)a|b) is interpreted as (?:(?i:a|b)), not (?:(?i:a)|b).
+ isolated option is not transparent to previous pattern.
ex. a(?i)* is a syntax error pattern.
+ allowed incomplete left brace as an usual string.
ex. /{/, /({)/, /a{2,3/ etc...
+ negative POSIX bracket [:^xxxx:] is supported.
+ POSIX bracket [:ascii:] is added.
+ repeat of look-ahead is not allowed.
ex. /(?=a)*/, /(?!b){5}/
+ Ignore case option is effective to numbered character.
ex. /\x61/i =~ "A"
+ In the range quantifier, the number of the minimum is omissible.
/a{,n}/ == /a{0,n}/
The simultaneous abbreviation of the number of times of the minimum
and the maximum is not allowed. (/a{,}/)
+ /a{n}?/ is not a non-greedy operator.
/a{n}?/ == /(?:a{n})?/
+ invalid back reference is checked and cause error.
/\1/, /(a)\2/
+ Zero-length match in infinite repeat stops the repeat,
then changes of the capture group status are checked as stop condition.
/(?:()|())*\1\2/ =~ ""
/(?:\1a|())*/ =~ "a"
A-5. Disabled functions by default syntax
+ capture history
(?@...) and (?@<name>...)
ex. /(?@a)*/.match("aaa") ==> [<0-1>, <1-2>, <2-3>]
see sample/listcap.c file.
A-6. Problems
+ Invalid encoding byte sequence is not checked.
ex. UTF-8
* Invalid first byte is treated as a character.
/./u =~ "\xa3"
* Incomplete byte sequence is not checked.
/\w+/ =~ "a\xf3\x8ec"
// END

532
src/Onigmo/doc/RE.ja Normal file
View File

@ -0,0 +1,532 @@
鬼雲 (鬼車改) 正規表現 Version 5.13.0 2012/01/19
使用文法: ONIG_SYNTAX_RUBY (既定値)
1. 基本要素
\ 退避修飾 (エスケープ) 正規表現記号の有効/無効の制御
| 選択子
(...) 式集合 (グループ)
[...] 文字集合 (文字クラス)
2. 文字
\t 水平タブ (0x09)
\v 垂直タブ (0x0B)
\n 改行 (0x0A)
\r 復帰 (0x0D)
\b 後退空白 (0x08)
\f 改頁 (0x0C)
\a 鐘 (0x07)
\e 退避修飾 (0x1B)
\nnn 八進数表現 符号化バイト値(の一部)
\xHH 十六進数表現 符号化バイト値(の一部)
\x{7HHHHHHH} 拡張十六進数表現 コードポイント値
\cx 制御文字表現 コードポイント値
\C-x 制御文字表現 コードポイント値
\M-x 超 (x|0x80) コードポイント値
\M-\C-x 超 + 制御文字表現 コードポイント値
※ \bは、文字集合内でのみ有効
3. 文字種
. 任意文字 (改行を除く)
\w 単語構成文字
Unicode以外の場合:
英数字 および "_"。
Unicodeの場合:
General_Category -- (Letter|Mark|Number|Connector_Punctuation)
ASCII外の文字を含むかどうかは ONIG_OPTION_ASCII_RANGE オプションに
依存する。
\W 非単語構成文字
\s 空白文字
Unicode以外の場合:
\t, \n, \v, \f, \r, \x20
Unicodeの場合:
0009, 000A, 000B, 000C, 000D, 0085(NEL),
General_Category -- Line_Separator
-- Paragraph_Separator
-- Space_Separator
ASCII外の文字を含むかどうかは ONIG_OPTION_ASCII_RANGE オプションに
依存する。
\S 非空白文字
\d 10進数字
Unicodeの場合: General_Category -- Decimal_Number
ASCII外の文字を含むかどうかは ONIG_OPTION_ASCII_RANGE オプションに
依存する。
\D 非10進数字
\h 16進数字 [0-9a-fA-F]
\H 非16進数字
Character Property
* \p{property-name}
* \p{^property-name} (negative)
* \P{property-name} (negative)
property-name:
+ 全てのエンコーディングで有効
Alnum, Alpha, Blank, Cntrl, Digit, Graph, Lower,
Print, Punct, Space, Upper, XDigit, Word, ASCII,
+ EUC-JP, Shift_JIS, CP932で有効
Hiragana, Katakana, Han, Latin, Greek, Cyrillic
+ UTF8, UTF16, UTF32で有効
UnicodeProps.txt 参照
\R 改行文字 (Linebreak)
Unicodeの場合:
(?>\x0D\x0A|[\x0A-\x0D\x{85}\x{2028}\x{2029}])
Unicode以外の場合:
(?>\x0D\x0A|[\x0A-\x0D])
\X eXtended grapheme cluster
Unicodeの場合:
(?>\P{M}\p{M}*)
Unicode以外の場合:
(?m:.)
4. 量指定子
欲張り
? 一回または零回
* 零回以上
+ 一回以上
{n,m} n回以上m回以下
{n,} n回以上
{,n} 零回以上n回以下 ({0,n})
{n} n回
無欲
?? 一回または零回
*? 零回以上
+? 一回以上
{n,m}? n回以上m回以下
{n,}? n回以上
{,n}? 零回以上n回以下 (== {0,n}?)
強欲 (欲張りで、繰り返しに成功した後は回数を減らすような後退再試行をしない)
?+ 一回または零回
*+ 零回以上
++ 一回以上
({n,m}+, {n,}+, {n}+ は、ONIG_SYNTAX_JAVAとONIG_SYNTAX_PERLでのみ強欲な
指定子)
例. /a*+/ === /(?>a*)/
5. 錨
^ 行頭
$ 行末
\b 単語境界
\B 非単語境界
\A 文字列先頭
\Z 文字列末尾、または文字列末尾の改行の直前
\z 文字列末尾
\G 照合開始位置
6. 文字集合
^... 否定 (最低優先度演算子)
x-y 範囲 (xからyまで)
[...] 集合 (文字集合内文字集合)
..&&.. 積演算 (^の次に優先度が低い演算子)
例. [a-w&&[^c-g]z] ==> ([a-w] and ([^c-g] or z)) ==> [abh-w]
※ '[', '-', ']'を、文字集合内で通常文字の意味で使用したい場合には、
これらの文字を'\'で退避修飾しなければならない。
POSIXブラケット ([:xxxxx:], 否定 [:^xxxxx:])
Unicode以外の場合:
alnum 英数字
alpha 英字
ascii 0 - 127
blank \t, \x20
cntrl
digit 0-9
graph \x21-\x7E および 多バイト文字全部を含む
lower
print \x20-\x7E および 多バイト文字全部を含む
punct
space \t, \n, \v, \f, \r, \x20
upper
xdigit 0-9, a-f, A-F
word 英数字, "_" および 多バイト文字
Unicodeの場合:
alnum Letter | Mark | Decimal_Number
alpha Letter | Mark
ascii 0000 - 007F
blank Space_Separator | 0009
cntrl Control | Format | Unassigned | Private_Use | Surrogate
digit Decimal_Number
graph [[:^space:]] && ^Control && ^Unassigned && ^Surrogate
lower Lowercase_Letter
print [[:graph:]] | Space_Separator
punct Connector_Punctuation | Dash_Punctuation | Close_Punctuation |
Final_Punctuation | Initial_Punctuation | Other_Punctuation |
Open_Punctuation
space Space_Separator | Line_Separator | Paragraph_Separator |
0009 | 000A | 000B | 000C | 000D | 0085
upper Uppercase_Letter
xdigit 0030 - 0039 | 0041 - 0046 | 0061 - 0066
(0-9, a-f, A-F)
word Letter | Mark | Decimal_Number | Connector_Punctuation
POSIXブラケットがASCII外の文字にマッチするかどうかは
ONIG_OPTION_ASCII_RANGEオプションとONIG_OPTION_POSIX_BRACKET_ALL_RANGE
オプションに依存する。
7. 拡張式集合
(?#...) 注釈
(?imxdau-imx) 孤立オプション
i: 大文字小文字照合
m: 複数行
x: 拡張形式
文字集合オプション (文字範囲オプション)
d: デフォルト (Ruby 1.9.3 互換)
\w, \d, \s は、非ASCII文字にマッチしない。
\b, \B, POSIXブラケットは、各エンコーディングの
ルールに従う。
a: ASCII
ONIG_OPTION_ASCII_RANGEオプションがオンになる。
\w, \d, \s, POSIXブラケットは、非ASCII文字に
マッチしない。
\b, \B は、ASCIIのルールに従う。
u: Unicode
ONIG_OPTION_ASCII_RANGEオプションがオフになる。
\w (\W), \d (\D), \s (\S), \b (\B), POSIXブラケット
は、各エンコーディングのルールに従う。
(?imxdau-imx:式) 式オプション
(式) 捕獲式集合
(?:式) 非捕獲式集合
(?=式) 先読み
(?!式) 否定先読み
(?<=式) 戻り読み
(?<!式) 否定戻り読み
戻り読みの式は固定文字長でなければならない。
しかし、最上位の選択子だけは異なった文字長が許される。
例. (?<=a|bc) は許可. (?<=aaa(?:b|cd)) は不許可
否定戻り読みでは、捕獲式集合は許されないが、
非捕獲式集合は許される。
\K 保持
戻り読みの別表記。\K の左側を保持し、検索結果に含まない。
(?>式) 原子的式集合
式全体を通過したとき、式の中での後退再試行を行なわない
(?<name>式), (?'name'式)
名前付き捕獲式集合
式集合に名前を割り当てる(定義する)。
(名前は単語構成文字でなければならない。)
名前だけでなく、捕獲式集合と同様に番号も割り当てられる。
番号指定が禁止されていない状態 (10. 捕獲式集合 を参照)
のときは、名前を使わないで番号でも参照できる。
複数の式集合に同じ名前を与えることは許されている。
この場合には、この名前を使用した後方参照は可能であるが、
部分式呼出しはできない。
(ONIG_SYNTAX_PERLでは部分式呼出しも可能。)
(?(条件)真の式), (?(条件)真の式|偽の式)
条件式
(条件)が真であれば真の式がマッチし、偽であれば偽の式が
マッチする。
(条件)には以下のものが使用できる。
(n) (n >= 1)
番号指定の後方参照が何かにマッチしていれば真、
マッチしていなければ偽
(<name>), ('name')
名前指定の後方参照が何かにマッチしていれば真、
マッチしていなければ偽
8. 後方参照
\n 番号指定参照 (n >= 1)
\k<n> 番号指定参照 (n >= 1)
\k'n' 番号指定参照 (n >= 1)
\k<-n> 相対番号指定参照 (n >= 1)
\k'-n' 相対番号指定参照 (n >= 1)
\k<name> 名前指定参照
\k'name' 名前指定参照
名前指定参照で、その名前が複数の式集合で多重定義されている場合には、
番号の大きい式集合から優先的に参照される。
(マッチしないときには番号の小さい式集合が参照される)
※ 番号指定参照は、名前付き捕獲式集合が定義され、
かつ ONIG_OPTION_CAPTURE_GROUPが指定されていない場合には、
禁止される。(10. 捕獲式集合 を参照)
※ ONIG_SYNTAX_PERLでは、\g{n}, \g{-n}, \g{name} も使用可能。
ネストレベル付き後方参照
level: 0, 1, 2, ...
\k<n+level> (n >= 1)
\k<n-level> (n >= 1)
\k'n+level' (n >= 1)
\k'n-level' (n >= 1)
\k<-n+level> (n >= 1)
\k<-n-level> (n >= 1)
\k'-n+level' (n >= 1)
\k'-n-level' (n >= 1)
\k<name+level>
\k<name-level>
\k'name+level'
\k'name-level'
後方参照の位置から相対的な部分式呼出しネストレベルを指定して、そのレベルでの
捕獲値を参照する。
例-1.
/\A(?<a>|.|(?:(?<b>.)\g<a>\k<b+0>))\z/.match("reer")
例-2.
r = Regexp.compile(<<'__REGEXP__'.strip, Regexp::EXTENDED)
(?<element> \g<stag> \g<content>* \g<etag> ){0}
(?<stag> < \g<name> \s* > ){0}
(?<name> [a-zA-Z_:]+ ){0}
(?<content> [^<&]+ (\g<element> | [^<&]+)* ){0}
(?<etag> </ \k<name+1> >){0}
\g<element>
__REGEXP__
p r.match('<foo>f<bar>bbb</bar>f</foo>').captures
9. 部分式呼出し ("田中哲スペシャル")
\g<name> 名前指定呼出し
\g'name' 名前指定呼出し
\g<n> 番号指定呼出し (n >= 1)
\g'n' 番号指定呼出し (n >= 1)
\g<0> パターン全体の再帰呼び出し
\g'0' パターン全体の再帰呼び出し
\g<-n> 相対番号指定呼出し (n >= 1)
\g'-n' 相対番号指定呼出し (n >= 1)
\g<+n> 相対番号指定呼出し (n >= 1)
\g'+n' 相対番号指定呼出し (n >= 1)
※ 最左位置での再帰呼出しは禁止される。
例. (?<name>a|\g<name>b) => error
(?<name>a|b\g<name>c) => OK
※ 番号指定呼出しは、名前付き捕獲式集合が定義され、
かつ ONIG_OPTION_CAPTURE_GROUPが指定されていない場合には、
禁止される。 (10. 捕獲式集合 を参照)
※ 呼び出された式集合のオプション状態が呼出し側のオプション状態と異なっている
とき、呼び出された側のオプション状態が有効である。
例. (?-i:\g<name>)(?i:(?<name>a)){0} は "A" に照合成功する。
※ ONIG_SYNTAX_PERLでは代わりに (?&name), (?n), (?-n), (?+n), (?R), (?0) を
使用する。
10. 捕獲式集合
捕獲式集合(...)は、以下の条件に応じて振舞が変化する。
(名前付き捕獲式集合は変化しない)
case 1. /.../ (名前付き捕獲式集合は不使用、オプションなし)
(...) は、捕獲式集合として扱われる。
case 2. /.../g (名前付き捕獲式集合は不使用、オプション 'g'を指定)
(...) は、非捕獲式集合として扱われる。
case 3. /..(?<name>..)../ (名前付き捕獲式集合は使用、オプションなし)
(...) は、非捕獲式集合として扱われる。
番号指定参照/呼び出しは不許可。
case 4. /..(?<name>..)../G (名前付き捕獲式集合は使用、オプション 'G'を指定)
(...) は、捕獲式集合として扱われる。
番号指定参照/呼び出しは許可。
但し
g: ONIG_OPTION_DONT_CAPTURE_GROUP
G: ONIG_OPTION_CAPTURE_GROUP
('g'と'G'オプションは、ruby-dev MLで議論された。)
これらの振舞の意味は、
名前付き捕獲と名前無し捕獲を同時に使用する必然性のある場面は少ないであろう
という理由から考えられたものである。
-----------------------------
補記 1. 文法依存オプション
+ ONIG_SYNTAX_RUBY
(?m): 終止符記号(.)は改行と照合成功
+ ONIG_SYNTAX_PERL、ONIG_SYNTAX_JAVA、ONIG_SYNTAX_PYTHON
(?s): 終止符記号(.)は改行と照合成功
(?m): ^ は改行の直後に照合する、$ は改行の直前に照合する
+ ONIG_SYNTAX_PERL
(?d), (?l): (?u)と同じ
補記 2. 独自拡張機能
+ 16進数数字、非16進数字 \h, \H
+ 名前付き捕獲式集合 (?<name>...), (?'name'...)
+ 名前指定後方参照 \k<name>
+ 部分式呼出し \g<name>, \g<group-num>
補記 3. Perl 5.14.0と比較して存在しない機能
+ \N{name}, \N{U+xxxx}, \N
+ \l,\u,\L,\U, \C
+ \v, \V, \h, \H, \o{xxx}
+ (?{code})
+ (??{code})
+ (?|...)
+ (*VERB:ARG)
* \Q...\E
但しONIG_SYNTAX_PERLとONIG_SYNTAX_JAVAでは有効
補記 4. Ruby 1.8 の日本語化 GNU regex(version 0.12)との違い
+ 文字Property機能追加 (\p{property}, \P{Property})
+ 16進数字タイプ追加 (\h, \H)
+ 戻り読み機能を追加
+ 強欲な繰り返し指定子を追加 (?+, *+, ++)
+ 文字集合の中の演算子を追加 ([...], &&)
('[' は、文字集合の中で通常の文字として使用するときには
退避修飾しなければならない)
+ 名前付き捕獲式集合と、部分式呼出し機能追加
+ 多バイト文字コードが指定されているとき、
文字集合の中で八進数または十六進数表現の連続は、多バイト符合で表現された
一個の文字と解釈される
(例. [\xa1\xa2], [\xa1\xa7-\xa4\xa1])
+ 文字集合の中で、一バイト文字と多バイト文字の範囲指定は許される。
ex. /[a-あ]/
+ 孤立オプションの有効範囲は、その孤立オプションを含んでいる式集合の
終わりまでである
例. (?:(?i)a|b) は (?:(?i:a|b)) と解釈される、(?:(?i:a)|b)ではない
+ 孤立オプションはその前の式に対して透過的ではない
例. /a(?i)*/ は文法エラーとなる
+ 不完全な繰り返し範囲指定子は通常の文字列として許可される
例. /{/, /({)/, /a{2,3/
+ 否定的POSIXブラケット [:^xxxx:] を追加
+ POSIXブラケット [:ascii:] を追加
+ 先読みの繰り返しは不許可
例. /(?=a)*/, /(?!b){5}/
+ 数値で指定された文字に対しても、大文字小文字照合オプションは有効
例. /\x61/i =~ "A"
+ 繰り返し回数指定で、最低回数の省略(0回)ができる
/a{,n}/ == /a{0,n}/
最低回数と最大回数の同時省略は許されない。(/a{,}/)
+ /a{n}?/は無欲な演算子ではない。
/a{n}?/ == /(?:a{n})?/
+ 無効な後方参照をチェックしてエラーにする。
/\1/, /(a)\2/
+ 無限繰り返しの中で、長さ零での照合成功は繰り返しを中断させるが、
このとき、中断すべきかどうかの判定として、捕獲式集合の捕獲状態の
変化まで考慮している
/(?:()|())*\1\2/ =~ ""
/(?:\1a|())*/ =~ "a"
補記 5. 実装されているが、既定値では有効にしていない機能
+ 捕獲履歴参照
(?@...) と (?@<name>...)
例. /(?@a)*/.match("aaa") ==> [<0-1>, <1-2>, <2-3>]
使用方法は、sample/listcap.cを参照
有効にしていない理由は、どの程度役に立つかはっきりしないため。
補記 6. 問題点
+ エンコーディングバイト値が適正な価かどうかのチェックは行なっていない。
例: UTF-8
* 先頭バイトとして不正なバイトを一文字とみなす
/./u =~ "\xa3"
* 不完全なバイトシーケンスのチェックをしない
/\w+/u =~ "a\xf3\x8ec"
これを調べることは可能ではあるが、遅くなるので行なわない。
文字列として、そのようなバイト列を指定した場合の動作は保証しない。
終り

View File

@ -0,0 +1,640 @@
Onigmo (Oniguruma-mod) Unicode Properties Version 5.13.1 2012/02/01
* POSIX brackets
Alpha
Blank
Cntrl
Digit
Graph
Lower
Print
Punct
Space
Upper
XDigit
Word
Alnum
ASCII
* Special
Any
Assigned
* Major and General Categories
C
Cc
Cf
Cn
Co
Cs
L
LC
Ll
Lm
Lo
Lt
Lu
M
Mc
Me
Mn
N
Nd
Nl
No
P
Pc
Pd
Pe
Pf
Pi
Po
Ps
S
Sc
Sk
Sm
So
Z
Zl
Zp
Zs
* Scripts
Arabic
Armenian
Avestan
Balinese
Bamum
Batak
Bengali
Bopomofo
Brahmi
Braille
Buginese
Buhid
Canadian_Aboriginal
Carian
Chakma
Cham
Cherokee
Common
Coptic
Cuneiform
Cypriot
Cyrillic
Deseret
Devanagari
Egyptian_Hieroglyphs
Ethiopic
Georgian
Glagolitic
Gothic
Greek
Gujarati
Gurmukhi
Han
Hangul
Hanunoo
Hebrew
Hiragana
Imperial_Aramaic
Inherited
Inscriptional_Pahlavi
Inscriptional_Parthian
Javanese
Kaithi
Kannada
Katakana
Kayah_Li
Kharoshthi
Khmer
Lao
Latin
Lepcha
Limbu
Linear_B
Lisu
Lycian
Lydian
Malayalam
Mandaic
Meetei_Mayek
Meroitic_Cursive
Meroitic_Hieroglyphs
Miao
Mongolian
Myanmar
New_Tai_Lue
Nko
Ogham
Ol_Chiki
Old_Italic
Old_Persian
Old_South_Arabian
Old_Turkic
Oriya
Osmanya
Phags_Pa
Phoenician
Rejang
Runic
Samaritan
Saurashtra
Sharada
Shavian
Sinhala
Sora_Sompeng
Sundanese
Syloti_Nagri
Syriac
Tagalog
Tagbanwa
Tai_Le
Tai_Tham
Tai_Viet
Takri
Tamil
Telugu
Thaana
Thai
Tibetan
Tifinagh
Ugaritic
Unknown
Vai
Yi
* DerivedCoreProperties
Alphabetic
Case_Ignorable
Cased
Changes_When_Casefolded
Changes_When_Casemapped
Changes_When_Lowercased
Changes_When_Titlecased
Changes_When_Uppercased
Default_Ignorable_Code_Point
Grapheme_Base
Grapheme_Extend
Grapheme_Link
ID_Continue
ID_Start
Lowercase
Math
Uppercase
XID_Continue
XID_Start
* PropList
ASCII_Hex_Digit
Bidi_Control
Dash
Deprecated
Diacritic
Extender
Hex_Digit
Hyphen
IDS_Binary_Operator
IDS_Trinary_Operator
Ideographic
Join_Control
Logical_Order_Exception
Noncharacter_Code_Point
Other_Alphabetic
Other_Default_Ignorable_Code_Point
Other_Grapheme_Extend
Other_ID_Continue
Other_ID_Start
Other_Lowercase
Other_Math
Other_Uppercase
Pattern_Syntax
Pattern_White_Space
Quotation_Mark
Radical
STerm
Soft_Dotted
Terminal_Punctuation
Unified_Ideograph
Variation_Selector
White_Space
* PropertyAliases
AHex
Bidi_C
CI
CWCF
CWCM
CWL
CWT
CWU
Dep
DI
Dia
Ext
Gr_Base
Gr_Ext
Gr_Link
Hex
IDC
Ideo
IDS
IDSB
IDST
Join_C
LOE
NChar
OAlpha
ODI
OGr_Ext
OIDC
OIDS
OLower
OMath
OUpper
Pat_Syn
Pat_WS
QMark
SD
Term
UIdeo
VS
WSpace
XIDC
XIDS
* PropertyValueAliases (General_Category)
Other
Control
Format
Unassigned
Private_Use
Surrogate
Letter
Cased_Letter
Lowercase_Letter
Modifier_Letter
Other_Letter
Titlecase_Letter
Uppercase_Letter
Mark
Spacing_Mark
Enclosing_Mark
Nonspacing_Mark
Number
Decimal_Number
Letter_Number
Other_Number
Punctuation
Connector_Punctation
Dash_Punctation
Close_Punctation
Final_Punctation
Initial_Punctation
Other_Punctation
Open_Punctation
Symbol
Currency_Symbol
Modifier_Symbol
Math_Symbol
Other_Symbol
Separator
Line_Separator
Paragraph_Separator
Space_Separator
* PropertyValueAliases (Script)
Arab
Armi
Armn
Avst
Bali
Bamu
Batk
Beng
Bopo
Brah
Brai
Bugi
Buhd
Cans
Cari
Cher
Copt
Qaac
Cprt
Cyrl
Deva
Dsrt
Egyp
Ethi
Geor
Glag
Goth
Grek
Gujr
Guru
Hang
Hani
Hano
Hebr
Hira
Ital
Java
Kali
Kana
Khar
Khmr
Knda
Kthi
Lana
Laoo
Latn
Lepc
Limb
Linb
Lyci
Lydi
Mand
Mlym
Mong
Mtei
Mymr
Nkoo
Ogam
Olck
Orkh
Orya
Osma
Phag
Phli
Phnx
Prti
Rjng
Runr
Samr
Sarb
Saur
Shaw
Sinh
Sund
Sylo
Syrc
Tagb
Tale
Talu
Taml
Tavt
Telu
Tfng
Tglg
Thaa
Tibt
Ugar
Vaii
Xpeo
Xsux
Yiii
Zinh
Qaai
Zyyy
Zzzz
* DerivedAges
Age=1.1
Age=2.0
Age=2.1
Age=3.0
Age=3.1
Age=3.2
Age=4.0
Age=4.1
Age=5.0
Age=5.1
Age=5.2
Age=6.0
Age=6.1
* Blocks
In_Basic_Latin
In_Latin_1_Supplement
In_Latin_Extended_A
In_Latin_Extended_B
In_IPA_Extensions
In_Spacing_Modifier_Letters
In_Combining_Diacritical_Marks
In_Greek_and_Coptic
In_Cyrillic
In_Cyrillic_Supplement
In_Armenian
In_Hebrew
In_Arabic
In_Syriac
In_Arabic_Supplement
In_Thaana
In_NKo
In_Samaritan
In_Mandaic
In_Arabic_Extended_A
In_Devanagari
In_Bengali
In_Gurmukhi
In_Gujarati
In_Oriya
In_Tamil
In_Telugu
In_Kannada
In_Malayalam
In_Sinhala
In_Thai
In_Lao
In_Tibetan
In_Myanmar
In_Georgian
In_Hangul_Jamo
In_Ethiopic
In_Ethiopic_Supplement
In_Cherokee
In_Unified_Canadian_Aboriginal_Syllabics
In_Ogham
In_Runic
In_Tagalog
In_Hanunoo
In_Buhid
In_Tagbanwa
In_Khmer
In_Mongolian
In_Unified_Canadian_Aboriginal_Syllabics_Extended
In_Limbu
In_Tai_Le
In_New_Tai_Lue
In_Khmer_Symbols
In_Buginese
In_Tai_Tham
In_Balinese
In_Sundanese
In_Batak
In_Lepcha
In_Ol_Chiki
In_Sundanese_Supplement
In_Vedic_Extensions
In_Phonetic_Extensions
In_Phonetic_Extensions_Supplement
In_Combining_Diacritical_Marks_Supplement
In_Latin_Extended_Additional
In_Greek_Extended
In_General_Punctuation
In_Superscripts_and_Subscripts
In_Currency_Symbols
In_Combining_Diacritical_Marks_for_Symbols
In_Letterlike_Symbols
In_Number_Forms
In_Arrows
In_Mathematical_Operators
In_Miscellaneous_Technical
In_Control_Pictures
In_Optical_Character_Recognition
In_Enclosed_Alphanumerics
In_Box_Drawing
In_Block_Elements
In_Geometric_Shapes
In_Miscellaneous_Symbols
In_Dingbats
In_Miscellaneous_Mathematical_Symbols_A
In_Supplemental_Arrows_A
In_Braille_Patterns
In_Supplemental_Arrows_B
In_Miscellaneous_Mathematical_Symbols_B
In_Supplemental_Mathematical_Operators
In_Miscellaneous_Symbols_and_Arrows
In_Glagolitic
In_Latin_Extended_C
In_Coptic
In_Georgian_Supplement
In_Tifinagh
In_Ethiopic_Extended
In_Cyrillic_Extended_A
In_Supplemental_Punctuation
In_CJK_Radicals_Supplement
In_Kangxi_Radicals
In_Ideographic_Description_Characters
In_CJK_Symbols_and_Punctuation
In_Hiragana
In_Katakana
In_Bopomofo
In_Hangul_Compatibility_Jamo
In_Kanbun
In_Bopomofo_Extended
In_CJK_Strokes
In_Katakana_Phonetic_Extensions
In_Enclosed_CJK_Letters_and_Months
In_CJK_Compatibility
In_CJK_Unified_Ideographs_Extension_A
In_Yijing_Hexagram_Symbols
In_CJK_Unified_Ideographs
In_Yi_Syllables
In_Yi_Radicals
In_Lisu
In_Vai
In_Cyrillic_Extended_B
In_Bamum
In_Modifier_Tone_Letters
In_Latin_Extended_D
In_Syloti_Nagri
In_Common_Indic_Number_Forms
In_Phags_pa
In_Saurashtra
In_Devanagari_Extended
In_Kayah_Li
In_Rejang
In_Hangul_Jamo_Extended_A
In_Javanese
In_Cham
In_Myanmar_Extended_A
In_Tai_Viet
In_Meetei_Mayek_Extensions
In_Ethiopic_Extended_A
In_Meetei_Mayek
In_Hangul_Syllables
In_Hangul_Jamo_Extended_B
In_High_Surrogates
In_High_Private_Use_Surrogates
In_Low_Surrogates
In_Private_Use_Area
In_CJK_Compatibility_Ideographs
In_Alphabetic_Presentation_Forms
In_Arabic_Presentation_Forms_A
In_Variation_Selectors
In_Vertical_Forms
In_Combining_Half_Marks
In_CJK_Compatibility_Forms
In_Small_Form_Variants
In_Arabic_Presentation_Forms_B
In_Halfwidth_and_Fullwidth_Forms
In_Specials
In_Linear_B_Syllabary
In_Linear_B_Ideograms
In_Aegean_Numbers
In_Ancient_Greek_Numbers
In_Ancient_Symbols
In_Phaistos_Disc
In_Lycian
In_Carian
In_Old_Italic
In_Gothic
In_Ugaritic
In_Old_Persian
In_Deseret
In_Shavian
In_Osmanya
In_Cypriot_Syllabary
In_Imperial_Aramaic
In_Phoenician
In_Lydian
In_Meroitic_Hieroglyphs
In_Meroitic_Cursive
In_Kharoshthi
In_Old_South_Arabian
In_Avestan
In_Inscriptional_Parthian
In_Inscriptional_Pahlavi
In_Old_Turkic
In_Rumi_Numeral_Symbols
In_Brahmi
In_Kaithi
In_Sora_Sompeng
In_Chakma
In_Sharada
In_Takri
In_Cuneiform
In_Cuneiform_Numbers_and_Punctuation
In_Egyptian_Hieroglyphs
In_Bamum_Supplement
In_Miao
In_Kana_Supplement
In_Byzantine_Musical_Symbols
In_Musical_Symbols
In_Ancient_Greek_Musical_Notation
In_Tai_Xuan_Jing_Symbols
In_Counting_Rod_Numerals
In_Mathematical_Alphanumeric_Symbols
In_Arabic_Mathematical_Alphabetic_Symbols
In_Mahjong_Tiles
In_Domino_Tiles
In_Playing_Cards
In_Enclosed_Alphanumeric_Supplement
In_Enclosed_Ideographic_Supplement
In_Miscellaneous_Symbols_And_Pictographs
In_Emoticons
In_Transport_And_Map_Symbols
In_Alchemical_Symbols
In_CJK_Unified_Ideographs_Extension_B
In_CJK_Unified_Ideographs_Extension_C
In_CJK_Unified_Ideographs_Extension_D
In_CJK_Compatibility_Ideographs_Supplement
In_Tags
In_Variation_Selectors_Supplement
In_Supplementary_Private_Use_Area_A
In_Supplementary_Private_Use_Area_B
In_No_Block

51
src/Onigmo/enc/ascii.c Normal file
View File

@ -0,0 +1,51 @@
/**********************************************************************
ascii.c - Onigmo (Oniguruma-mod) (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
OnigEncodingType OnigEncodingASCII = {
onigenc_single_byte_mbc_enc_len,
"US-ASCII", /* name */
1, /* max byte length */
1, /* min byte length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
onigenc_ascii_mbc_case_fold,
onigenc_ascii_apply_all_case_fold,
onigenc_ascii_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
onigenc_ascii_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

163
src/Onigmo/enc/big5.c Normal file
View File

@ -0,0 +1,163 @@
/**********************************************************************
big5.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
static const int EncLen_BIG5[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static int
big5_mbc_enc_len(const UChar* p)
{
return EncLen_BIG5[*p];
}
static OnigCodePoint
big5_mbc_to_code(const UChar* p, const UChar* end)
{
return onigenc_mbn_mbc_to_code(ONIG_ENCODING_BIG5, p, end);
}
static int
big5_code_to_mbc(OnigCodePoint code, UChar *buf)
{
return onigenc_mb2_code_to_mbc(ONIG_ENCODING_BIG5, code, buf);
}
static int
big5_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end,
UChar* lower)
{
return onigenc_mbn_mbc_case_fold(ONIG_ENCODING_BIG5, flag,
pp, end, lower);
}
#if 0
static int
big5_is_mbc_ambiguous(OnigCaseFoldType flag,
const UChar** pp, const UChar* end)
{
return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_BIG5, flag, pp, end);
}
#endif
static int
big5_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
return onigenc_mb2_is_code_ctype(ONIG_ENCODING_BIG5, code, ctype);
}
static const char BIG5_CAN_BE_TRAIL_TABLE[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
};
#define BIG5_ISMB_FIRST(byte) (EncLen_BIG5[byte] > 1)
#define BIG5_ISMB_TRAIL(byte) BIG5_CAN_BE_TRAIL_TABLE[(byte)]
static UChar*
big5_left_adjust_char_head(const UChar* start, const UChar* s)
{
const UChar *p;
int len;
if (s <= start) return (UChar* )s;
p = s;
if (BIG5_ISMB_TRAIL(*p)) {
while (p > start) {
if (! BIG5_ISMB_FIRST(*--p)) {
p++;
break;
}
}
}
len = enclen(ONIG_ENCODING_BIG5, p);
if (p + len > s) return (UChar* )p;
p += len;
return (UChar* )(p + ((s - p) & ~1));
}
static int
big5_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
{
const UChar c = *s;
return (BIG5_ISMB_TRAIL(c) ? FALSE : TRUE);
}
OnigEncodingType OnigEncodingBIG5 = {
big5_mbc_enc_len,
"Big5", /* name */
2, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
big5_mbc_to_code,
onigenc_mb2_code_to_mbclen,
big5_code_to_mbc,
big5_mbc_case_fold,
onigenc_ascii_apply_all_case_fold,
onigenc_ascii_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
big5_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
big5_left_adjust_char_head,
big5_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

201
src/Onigmo/enc/cp1251.c Normal file
View File

@ -0,0 +1,201 @@
/**********************************************************************
cp1251.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2006-2007 Byte <byte AT mail DOT kna DOT ru>
* K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_CP1251_TO_LOWER_CASE(c) EncCP1251_ToLowerCaseTable[c]
#define ENC_IS_CP1251_CTYPE(code,ctype) \
((EncCP1251_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
static const UChar EncCP1251_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\220', '\203', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\232', '\213', '\234', '\235', '\236', '\237',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\242', '\242', '\274', '\244', '\264', '\246', '\247',
'\270', '\251', '\272', '\253', '\254', '\255', '\256', '\277',
'\260', '\261', '\263', '\263', '\264', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\274', '\276', '\276', '\277',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
static const unsigned short EncCP1251_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x428c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x34a2, 0x34a2, 0x01a0, 0x30e2, 0x01a0, 0x01a0, 0x01a0, 0x01a0,
0x0000, 0x01a0, 0x34a2, 0x01a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x30e2, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x01a0,
0x0008, 0x0000, 0x30e2, 0x01a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x0280, 0x34a2, 0x30e2, 0x34a2, 0x01a0, 0x34a2, 0x01a0, 0x01a0,
0x34a2, 0x01a0, 0x34a2, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x34a2,
0x01a0, 0x01a0, 0x34a2, 0x30e2, 0x30e2, 0x31e2, 0x01a0, 0x01a0,
0x30e2, 0x0000, 0x30e2, 0x01a0, 0x30e2, 0x34a2, 0x30e2, 0x30e2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
};
static int
cp1251_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
{
const UChar* p = *pp;
*lower = ENC_CP1251_TO_LOWER_CASE(*p);
(*pp)++;
return 1;
}
static int
cp1251_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_CP1251_CTYPE(code, ctype);
else
return FALSE;
}
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
{ 0xb8, 0xa8 },
{ 0xe0, 0xc0 },
{ 0xe1, 0xc1 },
{ 0xe2, 0xc2 },
{ 0xe3, 0xc3 },
{ 0xe4, 0xc4 },
{ 0xe5, 0xc5 },
{ 0xe6, 0xc6 },
{ 0xe7, 0xc7 },
{ 0xe8, 0xc8 },
{ 0xe9, 0xc9 },
{ 0xea, 0xca },
{ 0xeb, 0xcb },
{ 0xec, 0xcc },
{ 0xed, 0xcd },
{ 0xee, 0xce },
{ 0xef, 0xcf },
{ 0xf0, 0xd0 },
{ 0xf1, 0xd1 },
{ 0xf2, 0xd2 },
{ 0xf3, 0xd3 },
{ 0xf4, 0xd4 },
{ 0xf5, 0xd5 },
{ 0xf6, 0xd6 },
{ 0xf7, 0xd7 },
{ 0xf8, 0xd8 },
{ 0xf9, 0xd9 },
{ 0xfa, 0xda },
{ 0xfb, 0xdb },
{ 0xfc, 0xdc },
{ 0xfd, 0xdd },
{ 0xfe, 0xde },
{ 0xff, 0xdf }
};
static int
cp1251_apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
flag, f, arg);
}
static int
cp1251_get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
flag, p, end, items);
}
OnigEncodingType OnigEncodingCP1251 = {
onigenc_single_byte_mbc_enc_len,
"CP1251", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
cp1251_mbc_case_fold,
cp1251_apply_all_case_fold,
cp1251_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
cp1251_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

32
src/Onigmo/enc/cp932.c Normal file
View File

@ -0,0 +1,32 @@
/**********************************************************************
cp932.c - Onigmo (Oniguruma-mod) (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2009 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#define ENC_CP932
#include "sjis.c"

531
src/Onigmo/enc/euc_jp.c Normal file
View File

@ -0,0 +1,531 @@
/**********************************************************************
euc_jp.c - Onigmo (Oniguruma-mod) (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regint.h"
#define eucjp_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1)
static const int EncLen_EUCJP[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
/* Fullwidth Alphabet */
{ 0xa3c1, 0xa3e1 },
{ 0xa3c2, 0xa3e2 },
{ 0xa3c3, 0xa3e3 },
{ 0xa3c4, 0xa3e4 },
{ 0xa3c5, 0xa3e5 },
{ 0xa3c6, 0xa3e6 },
{ 0xa3c7, 0xa3e7 },
{ 0xa3c8, 0xa3e8 },
{ 0xa3c9, 0xa3e9 },
{ 0xa3ca, 0xa3ea },
{ 0xa3cb, 0xa3eb },
{ 0xa3cc, 0xa3ec },
{ 0xa3cd, 0xa3ed },
{ 0xa3ce, 0xa3ee },
{ 0xa3cf, 0xa3ef },
{ 0xa3d0, 0xa3f0 },
{ 0xa3d1, 0xa3f1 },
{ 0xa3d2, 0xa3f2 },
{ 0xa3d3, 0xa3f3 },
{ 0xa3d4, 0xa3f4 },
{ 0xa3d5, 0xa3f5 },
{ 0xa3d6, 0xa3f6 },
{ 0xa3d7, 0xa3f7 },
{ 0xa3d8, 0xa3f8 },
{ 0xa3d9, 0xa3f9 },
{ 0xa3da, 0xa3fa },
/* Greek */
{ 0xa6a1, 0xa6c1 },
{ 0xa6a2, 0xa6c2 },
{ 0xa6a3, 0xa6c3 },
{ 0xa6a4, 0xa6c4 },
{ 0xa6a5, 0xa6c5 },
{ 0xa6a6, 0xa6c6 },
{ 0xa6a7, 0xa6c7 },
{ 0xa6a8, 0xa6c8 },
{ 0xa6a9, 0xa6c9 },
{ 0xa6aa, 0xa6ca },
{ 0xa6ab, 0xa6cb },
{ 0xa6ac, 0xa6cc },
{ 0xa6ad, 0xa6cd },
{ 0xa6ae, 0xa6ce },
{ 0xa6af, 0xa6cf },
{ 0xa6b0, 0xa6d0 },
{ 0xa6b1, 0xa6d1 },
{ 0xa6b2, 0xa6d2 },
{ 0xa6b3, 0xa6d3 },
{ 0xa6b4, 0xa6d4 },
{ 0xa6b5, 0xa6d5 },
{ 0xa6b6, 0xa6d6 },
{ 0xa6b7, 0xa6d7 },
{ 0xa6b8, 0xa6d8 },
/* Cyrillic */
{ 0xa7a1, 0xa7d1 },
{ 0xa7a2, 0xa7d2 },
{ 0xa7a3, 0xa7d3 },
{ 0xa7a4, 0xa7d4 },
{ 0xa7a5, 0xa7d5 },
{ 0xa7a6, 0xa7d6 },
{ 0xa7a7, 0xa7d7 },
{ 0xa7a8, 0xa7d8 },
{ 0xa7a9, 0xa7d9 },
{ 0xa7aa, 0xa7da },
{ 0xa7ab, 0xa7db },
{ 0xa7ac, 0xa7dc },
{ 0xa7ad, 0xa7dd },
{ 0xa7ae, 0xa7de },
{ 0xa7af, 0xa7df },
{ 0xa7b0, 0xa7e0 },
{ 0xa7b1, 0xa7e1 },
{ 0xa7b2, 0xa7e2 },
{ 0xa7b3, 0xa7e3 },
{ 0xa7b4, 0xa7e4 },
{ 0xa7b5, 0xa7e5 },
{ 0xa7b6, 0xa7e6 },
{ 0xa7b7, 0xa7e7 },
{ 0xa7b8, 0xa7e8 },
{ 0xa7b9, 0xa7e9 },
{ 0xa7ba, 0xa7ea },
{ 0xa7bb, 0xa7eb },
{ 0xa7bc, 0xa7ec },
{ 0xa7bd, 0xa7ed },
{ 0xa7be, 0xa7ee },
{ 0xa7bf, 0xa7ef },
{ 0xa7c0, 0xa7f0 },
{ 0xa7c1, 0xa7f1 },
};
static int
mbc_enc_len(const UChar* p)
{
return EncLen_EUCJP[*p];
}
static OnigCodePoint
mbc_to_code(const UChar* p, const UChar* end)
{
int c, i, len;
OnigCodePoint n;
len = mbc_enc_len(p);
n = (OnigCodePoint )*p++;
if (len == 1) return n;
for (i = 1; i < len; i++) {
if (p >= end) break;
c = *p++;
n <<= 8; n += c;
}
return n;
}
static int
code_to_mbclen(OnigCodePoint code)
{
if (ONIGENC_IS_CODE_ASCII(code)) return 1;
else if ((code & 0xff808080) == 0x00808080) return 3;
else if ((code & 0xffff8080) == 0x00008080) return 2;
else
return ONIGERR_INVALID_CODE_POINT_VALUE;
}
#if 0
static int
code_to_mbc_first(OnigCodePoint code)
{
int first;
if ((code & 0xff0000) != 0) {
first = (code >> 16) & 0xff;
}
else if ((code & 0xff00) != 0) {
first = (code >> 8) & 0xff;
}
else {
return (int )code;
}
return first;
}
#endif
static int
code_to_mbc(OnigCodePoint code, UChar *buf)
{
UChar *p = buf;
if ((code & 0xff0000) != 0) *p++ = (UChar )(((code >> 16) & 0xff));
if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff));
*p++ = (UChar )(code & 0xff);
#if 1
if (mbc_enc_len(buf) != (p - buf))
return ONIGERR_INVALID_CODE_POINT_VALUE;
#endif
return (int )(p - buf);
}
static int
apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
flag, f, arg);
}
static OnigCodePoint
get_lower_case(OnigCodePoint code)
{
if (ONIGENC_IS_IN_RANGE(code, 0xa3c1, 0xa3da)) {
/* Fullwidth Alphabet */
return (OnigCodePoint )(code + 0x0020);
}
else if (ONIGENC_IS_IN_RANGE(code, 0xa6a1, 0xa6b8)) {
/* Greek */
return (OnigCodePoint )(code + 0x0020);
}
else if (ONIGENC_IS_IN_RANGE(code, 0xa7a1, 0xa7c1)) {
/* Cyrillic */
return (OnigCodePoint )(code + 0x0030);
}
return code;
}
static OnigCodePoint
get_upper_case(OnigCodePoint code)
{
if (ONIGENC_IS_IN_RANGE(code, 0xa3e1, 0xa3fa)) {
/* Fullwidth Alphabet */
return (OnigCodePoint )(code - 0x0020);
}
else if (ONIGENC_IS_IN_RANGE(code, 0xa6c1, 0xa6d8)) {
/* Greek */
return (OnigCodePoint )(code - 0x0020);
}
else if (ONIGENC_IS_IN_RANGE(code, 0xa7d1, 0xa7f1)) {
/* Cyrillic */
return (OnigCodePoint )(code - 0x0030);
}
return code;
}
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end,
OnigCaseFoldCodeItem items[])
{
int len;
OnigCodePoint code, code_lo, code_up;
code = mbc_to_code(p, end);
if (ONIGENC_IS_ASCII_CODE(code))
return onigenc_ascii_get_case_fold_codes_by_str(flag, p, end, items);
len = mbc_enc_len(p);
code_lo = get_lower_case(code);
code_up = get_upper_case(code);
if (code != code_lo) {
items[0].byte_len = len;
items[0].code_len = 1;
items[0].code[0] = code_lo;
return 1;
}
else if (code != code_up) {
items[0].byte_len = len;
items[0].code_len = 1;
items[0].code[0] = code_up;
return 1;
}
return 0;
}
static int
mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
{
const UChar* p = *pp;
if (ONIGENC_IS_MBC_ASCII(p)) {
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
(*pp)++;
return 1;
}
else {
OnigCodePoint code;
int len;
code = get_lower_case(mbc_to_code(p, end));
len = code_to_mbc(code, lower);
(*pp) += len;
return len; /* return byte length of converted char to lower */
}
}
static UChar*
left_adjust_char_head(const UChar* start, const UChar* s)
{
/* In this encoding
mb-trail bytes doesn't mix with single bytes.
*/
const UChar *p;
int len;
if (s <= start) return (UChar* )s;
p = s;
while (!eucjp_islead(*p) && p > start) p--;
len = mbc_enc_len(p);
if (p + len > s) return (UChar* )p;
p += len;
return (UChar* )(p + ((s - p) & ~1));
}
static int
is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
{
const UChar c = *s;
if (c <= 0x7e || c == 0x8e || c == 0x8f)
return TRUE;
else
return FALSE;
}
static int PropertyInited = 0;
static const OnigCodePoint** PropertyList;
static int PropertyListNum;
static int PropertyListSize;
static hash_table_type* PropertyNameTable;
static const OnigCodePoint CR_Hiragana[] = {
1,
#ifdef ENC_EUC_JIS_2004
0xa4a1, 0xa4fb
#else
0xa4a1, 0xa4f3
#endif
}; /* CR_Hiragana */
#ifdef ENC_EUC_JIS_2004
static const OnigCodePoint CR_Katakana[] = {
5,
0x8ea6, 0x8eaf, /* JIS X 0201 Katakana */
0x8eb1, 0x8edd, /* JIS X 0201 Katakana */
0xa5a1, 0xa5fe,
0xa6ee, 0xa6fe,
0xa7f2, 0xa7f5,
}; /* CR_Katakana */
#else
static const OnigCodePoint CR_Katakana[] = {
3,
0x8ea6, 0x8eaf, /* JIS X 0201 Katakana */
0x8eb1, 0x8edd, /* JIS X 0201 Katakana */
0xa5a1, 0xa5f6,
}; /* CR_Katakana */
#endif
#ifdef ENC_EUC_JIS_2004
static const OnigCodePoint CR_Han[] = {
/* EUC-JIS-2004 (JIS X 0213:2004) */
7,
/* plane 1 */
0xa1b8, 0xa1b8,
0xaea1, 0xfefe, /* Kanji level 1, 2 and 3 */
/* plane 2 */
0x8fa1a1, 0x8fa1fe, /* row 1 */
0x8fa3a1, 0x8fa5fe, /* row 3 .. 5 */
0x8fa8a1, 0x8fa8fe, /* row 8 */
0x8faca1, 0x8faffe, /* row 12 .. 15 */
0x8feea1, 0x8ffef6, /* row 78 .. 94 */
}; /* CR_Han */
#else
static const OnigCodePoint CR_Han[] = {
/* EUC-JP (JIS X 0208 based) */
4,
0xa1b8, 0xa1b8,
0xb0a1, 0xcfd3, /* Kanji level 1 */
0xd0a1, 0xf4a6, /* Kanji level 2 */
0x8fb0a1, 0x8fedf3 /* JIS X 0212 Supplemental Kanji (row 16 .. 77) */
}; /* CR_Han */
#endif
static const OnigCodePoint CR_Latin[] = {
4,
0x0041, 0x005a,
0x0061, 0x007a,
0xa3c1, 0xa3da,
0xa3e1, 0xa3fa,
/* TODO: add raw 8 .. 11 to support EUC-JIS-2004 */
/* TODO: add JIS X 0212 row 9 .. 11 */
}; /* CR_Latin */
static const OnigCodePoint CR_Greek[] = {
2,
0xa6a1, 0xa6b8,
#ifdef ENC_EUC_JIS_2004
0xa6c1, 0xa6d9,
#else
0xa6c1, 0xa6d8,
/* TODO: add JIS X 0212 row 6 */
#endif
}; /* CR_Greek */
static const OnigCodePoint CR_Cyrillic[] = {
2,
0xa7a1, 0xa7c1,
0xa7d1, 0xa7f1,
/* TODO: add JIS X 0212 row 7 */
}; /* CR_Cyrillic */
static int
init_property_list(void)
{
int r;
PROPERTY_LIST_ADD_PROP("hiragana", CR_Hiragana);
PROPERTY_LIST_ADD_PROP("katakana", CR_Katakana);
PROPERTY_LIST_ADD_PROP("han", CR_Han);
PROPERTY_LIST_ADD_PROP("latin", CR_Latin);
PROPERTY_LIST_ADD_PROP("greek", CR_Greek);
PROPERTY_LIST_ADD_PROP("cyrillic", CR_Cyrillic);
PropertyInited = 1;
end:
return r;
}
static int
property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
{
hash_data_type ctype;
UChar *s, *e;
PROPERTY_LIST_INIT_CHECK;
s = e = xalloca(end - p + 1);
for (; p < end; p++) {
*e++ = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
}
if (onig_st_lookup_strend(PropertyNameTable, s, e, &ctype) == 0) {
return onigenc_minimum_property_name_to_ctype(enc, s, e);
}
return (int )ctype;
}
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (ctype <= ONIGENC_MAX_STD_CTYPE) {
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
else {
if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
return (code_to_mbclen(code) > 1 ? TRUE : FALSE);
}
}
}
else {
PROPERTY_LIST_INIT_CHECK;
ctype -= (ONIGENC_MAX_STD_CTYPE + 1);
if (ctype >= (unsigned int )PropertyListNum)
return ONIGERR_TYPE_BUG;
return onig_is_in_code_range((UChar* )PropertyList[ctype], code);
}
return FALSE;
}
static int
get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out,
const OnigCodePoint* ranges[])
{
if (ctype <= ONIGENC_MAX_STD_CTYPE) {
return ONIG_NO_SUPPORT_CONFIG;
}
else {
*sb_out = 0x80;
PROPERTY_LIST_INIT_CHECK;
ctype -= (ONIGENC_MAX_STD_CTYPE + 1);
if (ctype >= (OnigCtype )PropertyListNum)
return ONIGERR_TYPE_BUG;
*ranges = PropertyList[ctype];
return 0;
}
}
OnigEncodingType OnigEncodingEUC_JP = {
mbc_enc_len,
"EUC-JP", /* name */
3, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
mbc_to_code,
code_to_mbclen,
code_to_mbc,
mbc_case_fold,
apply_all_case_fold,
get_case_fold_codes_by_str,
property_name_to_ctype,
is_code_ctype,
get_ctype_code_range,
left_adjust_char_head,
is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

160
src/Onigmo/enc/euc_kr.c Normal file
View File

@ -0,0 +1,160 @@
/**********************************************************************
euc_kr.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
static const int EncLen_EUCKR[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static int
euckr_mbc_enc_len(const UChar* p)
{
return EncLen_EUCKR[*p];
}
static OnigCodePoint
euckr_mbc_to_code(const UChar* p, const UChar* end)
{
return onigenc_mbn_mbc_to_code(ONIG_ENCODING_EUC_KR, p, end);
}
static int
euckr_code_to_mbc(OnigCodePoint code, UChar *buf)
{
return onigenc_mb2_code_to_mbc(ONIG_ENCODING_EUC_KR, code, buf);
}
static int
euckr_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end,
UChar* lower)
{
return onigenc_mbn_mbc_case_fold(ONIG_ENCODING_EUC_KR, flag,
pp, end, lower);
}
#if 0
static int
euckr_is_mbc_ambiguous(OnigCaseFoldType flag,
const UChar** pp, const UChar* end)
{
return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_KR, flag, pp, end);
}
#endif
static int
euckr_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
return onigenc_mb2_is_code_ctype(ONIG_ENCODING_EUC_KR, code, ctype);
}
#define euckr_islead(c) ((c) < 0xa1 || (c) == 0xff)
static UChar*
euckr_left_adjust_char_head(const UChar* start, const UChar* s)
{
/* Assumed in this encoding,
mb-trail bytes don't mix with single bytes.
*/
const UChar *p;
int len;
if (s <= start) return (UChar* )s;
p = s;
while (!euckr_islead(*p) && p > start) p--;
len = enclen(ONIG_ENCODING_EUC_KR, p);
if (p + len > s) return (UChar* )p;
p += len;
return (UChar* )(p + ((s - p) & ~1));
}
static int
euckr_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
{
const UChar c = *s;
if (c <= 0x7e) return TRUE;
else return FALSE;
}
OnigEncodingType OnigEncodingEUC_KR = {
euckr_mbc_enc_len,
"EUC-KR", /* name */
2, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
euckr_mbc_to_code,
onigenc_mb2_code_to_mbclen,
euckr_code_to_mbc,
euckr_mbc_case_fold,
onigenc_ascii_apply_all_case_fold,
onigenc_ascii_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
euckr_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
euckr_left_adjust_char_head,
euckr_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};
/* Same with OnigEncodingEUC_KR except the name */
OnigEncodingType OnigEncodingEUC_CN = {
euckr_mbc_enc_len,
"EUC-CN", /* name */
2, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
euckr_mbc_to_code,
onigenc_mb2_code_to_mbclen,
euckr_code_to_mbc,
euckr_mbc_case_fold,
onigenc_ascii_apply_all_case_fold,
onigenc_ascii_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
euckr_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
euckr_left_adjust_char_head,
euckr_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

130
src/Onigmo/enc/euc_tw.c Normal file
View File

@ -0,0 +1,130 @@
/**********************************************************************
euc_tw.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
static const int EncLen_EUCTW[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static int
euctw_mbc_enc_len(const UChar* p)
{
return EncLen_EUCTW[*p];
}
static OnigCodePoint
euctw_mbc_to_code(const UChar* p, const UChar* end)
{
return onigenc_mbn_mbc_to_code(ONIG_ENCODING_EUC_TW, p, end);
}
static int
euctw_code_to_mbc(OnigCodePoint code, UChar *buf)
{
return onigenc_mb4_code_to_mbc(ONIG_ENCODING_EUC_TW, code, buf);
}
static int
euctw_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end,
UChar* lower)
{
return onigenc_mbn_mbc_case_fold(ONIG_ENCODING_EUC_TW, flag,
pp, end, lower);
}
static int
euctw_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
return onigenc_mb4_is_code_ctype(ONIG_ENCODING_EUC_TW, code, ctype);
}
#define euctw_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1)
static UChar*
euctw_left_adjust_char_head(const UChar* start, const UChar* s)
{
/* Assumed in this encoding,
mb-trail bytes don't mix with single bytes.
*/
const UChar *p;
int len;
if (s <= start) return (UChar* )s;
p = s;
while (!euctw_islead(*p) && p > start) p--;
len = enclen(ONIG_ENCODING_EUC_TW, p);
if (p + len > s) return (UChar* )p;
p += len;
return (UChar* )(p + ((s - p) & ~1));
}
static int
euctw_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
{
const UChar c = *s;
if (c <= 0x7e) return TRUE;
else return FALSE;
}
OnigEncodingType OnigEncodingEUC_TW = {
euctw_mbc_enc_len,
"EUC-TW", /* name */
4, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
euctw_mbc_to_code,
onigenc_mb4_code_to_mbclen,
euctw_code_to_mbc,
euctw_mbc_case_fold,
onigenc_ascii_apply_all_case_fold,
onigenc_ascii_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
euctw_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
euctw_left_adjust_char_head,
euctw_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

496
src/Onigmo/enc/gb18030.c Normal file
View File

@ -0,0 +1,496 @@
/**********************************************************************
gb18030.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2005-2007 KUBO Takehiro <kubo AT jiubao DOT org>
* K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#if 1
#define DEBUG_GB18030(arg)
#else
#define DEBUG_GB18030(arg) printf arg
#endif
enum {
C1, /* one-byte char */
C2, /* one-byte or second of two-byte char */
C4, /* one-byte or second or fourth of four-byte char */
CM /* first of two- or four-byte char or second of two-byte char */
};
static const char GB18030_MAP[] = {
C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1,
C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1,
C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1,
C4, C4, C4, C4, C4, C4, C4, C4, C4, C4, C1, C1, C1, C1, C1, C1,
C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C1,
C2, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, C1
};
static int
gb18030_mbc_enc_len(const UChar* p)
{
if (GB18030_MAP[*p] != CM)
return 1;
p++;
if (GB18030_MAP[*p] == C4)
return 4;
if (GB18030_MAP[*p] == C1)
return 1; /* illegal sequence */
return 2;
}
static OnigCodePoint
gb18030_mbc_to_code(const UChar* p, const UChar* end)
{
return onigenc_mbn_mbc_to_code(ONIG_ENCODING_GB18030, p, end);
}
static int
gb18030_code_to_mbc(OnigCodePoint code, UChar *buf)
{
return onigenc_mb4_code_to_mbc(ONIG_ENCODING_GB18030, code, buf);
}
static int
gb18030_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end,
UChar* lower)
{
return onigenc_mbn_mbc_case_fold(ONIG_ENCODING_GB18030, flag,
pp, end, lower);
}
#if 0
static int
gb18030_is_mbc_ambiguous(OnigCaseFoldType flag,
const UChar** pp, const UChar* end)
{
return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_GB18030, flag, pp, end);
}
#endif
static int
gb18030_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
return onigenc_mb4_is_code_ctype(ONIG_ENCODING_GB18030, code, ctype);
}
enum state {
S_START,
S_one_C2,
S_one_C4,
S_one_CM,
S_odd_CM_one_CX,
S_even_CM_one_CX,
/* CMC4 : pair of "CM C4" */
S_one_CMC4,
S_odd_CMC4,
S_one_C4_odd_CMC4,
S_even_CMC4,
S_one_C4_even_CMC4,
S_odd_CM_odd_CMC4,
S_even_CM_odd_CMC4,
S_odd_CM_even_CMC4,
S_even_CM_even_CMC4,
/* C4CM : pair of "C4 CM" */
S_odd_C4CM,
S_one_CM_odd_C4CM,
S_even_C4CM,
S_one_CM_even_C4CM,
S_even_CM_odd_C4CM,
S_odd_CM_odd_C4CM,
S_even_CM_even_C4CM,
S_odd_CM_even_C4CM,
};
static UChar*
gb18030_left_adjust_char_head(const UChar* start, const UChar* s)
{
const UChar *p;
enum state state = S_START;
DEBUG_GB18030(("----------------\n"));
for (p = s; p >= start; p--) {
DEBUG_GB18030(("state %d --(%02x)-->\n", state, *p));
switch (state) {
case S_START:
switch (GB18030_MAP[*p]) {
case C1:
return (UChar *)s;
case C2:
state = S_one_C2; /* C2 */
break;
case C4:
state = S_one_C4; /* C4 */
break;
case CM:
state = S_one_CM; /* CM */
break;
}
break;
case S_one_C2: /* C2 */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
case C4:
return (UChar *)s;
case CM:
state = S_odd_CM_one_CX; /* CM C2 */
break;
}
break;
case S_one_C4: /* C4 */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
case C4:
return (UChar *)s;
case CM:
state = S_one_CMC4;
break;
}
break;
case S_one_CM: /* CM */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
return (UChar *)s;
case C4:
state = S_odd_C4CM;
break;
case CM:
state = S_odd_CM_one_CX; /* CM CM */
break;
}
break;
case S_odd_CM_one_CX: /* CM C2 */ /* CM CM */ /* CM CM CM C4 */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
case C4:
return (UChar *)(s - 1);
case CM:
state = S_even_CM_one_CX;
break;
}
break;
case S_even_CM_one_CX: /* CM CM C2 */ /* CM CM CM */ /* CM CM C4 */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
case C4:
return (UChar *)s;
case CM:
state = S_odd_CM_one_CX;
break;
}
break;
case S_one_CMC4: /* CM C4 */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
return (UChar *)(s - 1);
case C4:
state = S_one_C4_odd_CMC4; /* C4 CM C4 */
break;
case CM:
state = S_even_CM_one_CX; /* CM CM C4 */
break;
}
break;
case S_odd_CMC4: /* CM C4 CM C4 CM C4 */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
return (UChar *)(s - 1);
case C4:
state = S_one_C4_odd_CMC4;
break;
case CM:
state = S_odd_CM_odd_CMC4;
break;
}
break;
case S_one_C4_odd_CMC4: /* C4 CM C4 */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
case C4:
return (UChar *)(s - 1);
case CM:
state = S_even_CMC4; /* CM C4 CM C4 */
break;
}
break;
case S_even_CMC4: /* CM C4 CM C4 */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
return (UChar *)(s - 3);
case C4:
state = S_one_C4_even_CMC4;
break;
case CM:
state = S_odd_CM_even_CMC4;
break;
}
break;
case S_one_C4_even_CMC4: /* C4 CM C4 CM C4 */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
case C4:
return (UChar *)(s - 3);
case CM:
state = S_odd_CMC4;
break;
}
break;
case S_odd_CM_odd_CMC4: /* CM CM C4 CM C4 CM C4 */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
case C4:
return (UChar *)(s - 3);
case CM:
state = S_even_CM_odd_CMC4;
break;
}
break;
case S_even_CM_odd_CMC4: /* CM CM CM C4 CM C4 CM C4 */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
case C4:
return (UChar *)(s - 1);
case CM:
state = S_odd_CM_odd_CMC4;
break;
}
break;
case S_odd_CM_even_CMC4: /* CM CM C4 CM C4 */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
case C4:
return (UChar *)(s - 1);
case CM:
state = S_even_CM_even_CMC4;
break;
}
break;
case S_even_CM_even_CMC4: /* CM CM CM C4 CM C4 */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
case C4:
return (UChar *)(s - 3);
case CM:
state = S_odd_CM_even_CMC4;
break;
}
break;
case S_odd_C4CM: /* C4 CM */ /* C4 CM C4 CM C4 CM*/
switch (GB18030_MAP[*p]) {
case C1:
case C2:
case C4:
return (UChar *)s;
case CM:
state = S_one_CM_odd_C4CM; /* CM C4 CM */
break;
}
break;
case S_one_CM_odd_C4CM: /* CM C4 CM */ /* CM C4 CM C4 CM C4 CM */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
return (UChar *)(s - 2); /* |CM C4 CM */
case C4:
state = S_even_C4CM;
break;
case CM:
state = S_even_CM_odd_C4CM;
break;
}
break;
case S_even_C4CM: /* C4 CM C4 CM */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
case C4:
return (UChar *)(s - 2); /* C4|CM C4 CM */
case CM:
state = S_one_CM_even_C4CM;
break;
}
break;
case S_one_CM_even_C4CM: /* CM C4 CM C4 CM */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
return (UChar *)(s - 0); /*|CM C4 CM C4|CM */
case C4:
state = S_odd_C4CM;
break;
case CM:
state = S_even_CM_even_C4CM;
break;
}
break;
case S_even_CM_odd_C4CM: /* CM CM C4 CM */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
case C4:
return (UChar *)(s - 0); /* |CM CM|C4|CM */
case CM:
state = S_odd_CM_odd_C4CM;
break;
}
break;
case S_odd_CM_odd_C4CM: /* CM CM CM C4 CM */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
case C4:
return (UChar *)(s - 2); /* |CM CM|CM C4 CM */
case CM:
state = S_even_CM_odd_C4CM;
break;
}
break;
case S_even_CM_even_C4CM: /* CM CM C4 CM C4 CM */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
case C4:
return (UChar *)(s - 2); /* |CM CM|C4|CM C4 CM */
case CM:
state = S_odd_CM_even_C4CM;
break;
}
break;
case S_odd_CM_even_C4CM: /* CM CM CM C4 CM C4 CM */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
case C4:
return (UChar *)(s - 0); /* |CM CM|CM C4 CM C4|CM */
case CM:
state = S_even_CM_even_C4CM;
break;
}
break;
}
}
DEBUG_GB18030(("state %d\n", state));
switch (state) {
case S_START: return (UChar *)(s - 0);
case S_one_C2: return (UChar *)(s - 0);
case S_one_C4: return (UChar *)(s - 0);
case S_one_CM: return (UChar *)(s - 0);
case S_odd_CM_one_CX: return (UChar *)(s - 1);
case S_even_CM_one_CX: return (UChar *)(s - 0);
case S_one_CMC4: return (UChar *)(s - 1);
case S_odd_CMC4: return (UChar *)(s - 1);
case S_one_C4_odd_CMC4: return (UChar *)(s - 1);
case S_even_CMC4: return (UChar *)(s - 3);
case S_one_C4_even_CMC4: return (UChar *)(s - 3);
case S_odd_CM_odd_CMC4: return (UChar *)(s - 3);
case S_even_CM_odd_CMC4: return (UChar *)(s - 1);
case S_odd_CM_even_CMC4: return (UChar *)(s - 1);
case S_even_CM_even_CMC4: return (UChar *)(s - 3);
case S_odd_C4CM: return (UChar *)(s - 0);
case S_one_CM_odd_C4CM: return (UChar *)(s - 2);
case S_even_C4CM: return (UChar *)(s - 2);
case S_one_CM_even_C4CM: return (UChar *)(s - 0);
case S_even_CM_odd_C4CM: return (UChar *)(s - 0);
case S_odd_CM_odd_C4CM: return (UChar *)(s - 2);
case S_even_CM_even_C4CM: return (UChar *)(s - 2);
case S_odd_CM_even_C4CM: return (UChar *)(s - 0);
}
return (UChar* )s; /* never come here. (escape warning) */
}
static int
gb18030_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
{
return GB18030_MAP[*s] == C1 ? TRUE : FALSE;
}
OnigEncodingType OnigEncodingGB18030 = {
gb18030_mbc_enc_len,
"GB18030", /* name */
4, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
gb18030_mbc_to_code,
onigenc_mb4_code_to_mbclen,
gb18030_code_to_mbc,
gb18030_mbc_case_fold,
onigenc_ascii_apply_all_case_fold,
onigenc_ascii_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
gb18030_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
gb18030_left_adjust_char_head,
gb18030_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

273
src/Onigmo/enc/iso8859_1.c Normal file
View File

@ -0,0 +1,273 @@
/**********************************************************************
iso8859_1.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_IS_ISO_8859_1_CTYPE(code,ctype) \
((EncISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
static const unsigned short EncISO_8859_1_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0,
0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
};
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xf0 },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xfc },
{ 0xdd, 0xfd },
{ 0xde, 0xfe }
};
static int
apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, f, arg);
}
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
const OnigUChar* p, const OnigUChar* end,
OnigCaseFoldCodeItem items[])
{
if (0x41 <= *p && *p <= 0x5a) {
items[0].byte_len = 1;
items[0].code_len = 1;
items[0].code[0] = (OnigCodePoint )(*p + 0x20);
if (*p == 0x53 && end > p + 1
&& (*(p+1) == 0x53 || *(p+1) == 0x73)) { /* SS */
items[1].byte_len = 2;
items[1].code_len = 1;
items[1].code[0] = (OnigCodePoint )0xdf;
return 2;
}
else
return 1;
}
else if (0x61 <= *p && *p <= 0x7a) {
items[0].byte_len = 1;
items[0].code_len = 1;
items[0].code[0] = (OnigCodePoint )(*p - 0x20);
if (*p == 0x73 && end > p + 1
&& (*(p+1) == 0x73 || *(p+1) == 0x53)) { /* ss */
items[1].byte_len = 2;
items[1].code_len = 1;
items[1].code[0] = (OnigCodePoint )0xdf;
return 2;
}
else
return 1;
}
else if (0xc0 <= *p && *p <= 0xcf) {
items[0].byte_len = 1;
items[0].code_len = 1;
items[0].code[0] = (OnigCodePoint )(*p + 0x20);
return 1;
}
else if (0xd0 <= *p && *p <= 0xdf) {
if (*p == 0xdf) {
items[0].byte_len = 1;
items[0].code_len = 2;
items[0].code[0] = (OnigCodePoint )'s';
items[0].code[1] = (OnigCodePoint )'s';
items[1].byte_len = 1;
items[1].code_len = 2;
items[1].code[0] = (OnigCodePoint )'S';
items[1].code[1] = (OnigCodePoint )'S';
items[2].byte_len = 1;
items[2].code_len = 2;
items[2].code[0] = (OnigCodePoint )'s';
items[2].code[1] = (OnigCodePoint )'S';
items[3].byte_len = 1;
items[3].code_len = 2;
items[3].code[0] = (OnigCodePoint )'S';
items[3].code[1] = (OnigCodePoint )'s';
return 4;
}
else if (*p != 0xd7) {
items[0].byte_len = 1;
items[0].code_len = 1;
items[0].code[0] = (OnigCodePoint )(*p + 0x20);
return 1;
}
}
else if (0xe0 <= *p && *p <= 0xef) {
items[0].byte_len = 1;
items[0].code_len = 1;
items[0].code[0] = (OnigCodePoint )(*p - 0x20);
return 1;
}
else if (0xf0 <= *p && *p <= 0xfe) {
if (*p != 0xf7) {
items[0].byte_len = 1;
items[0].code_len = 1;
items[0].code[0] = (OnigCodePoint )(*p - 0x20);
return 1;
}
}
return 0;
}
static int
mbc_case_fold(OnigCaseFoldType flag, const UChar** pp,
const UChar* end ARG_UNUSED, UChar* lower)
{
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
*lower++ = 's';
*lower = 's';
(*pp)++;
return 2;
}
*lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
(*pp)++;
return 1;
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag,
const UChar** pp, const UChar* end)
{
int v;
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
(*pp)++;
return TRUE;
}
(*pp)++;
v = (EncISO_8859_1_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
if ((v | BIT_CTYPE_LOWER) != 0) {
/* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
if (*p >= 0xaa && *p <= 0xba)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
#endif
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_1_CTYPE(code, ctype);
else
return FALSE;
}
OnigEncodingType OnigEncodingISO_8859_1 = {
onigenc_single_byte_mbc_enc_len,
"ISO-8859-1", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
mbc_case_fold,
apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

240
src/Onigmo/enc/iso8859_10.c Normal file
View File

@ -0,0 +1,240 @@
/**********************************************************************
iso8859_10.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_ISO_8859_10_TO_LOWER_CASE(c) EncISO_8859_10_ToLowerCaseTable[c]
#define ENC_IS_ISO_8859_10_CTYPE(code,ctype) \
((EncISO_8859_10_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
static const UChar EncISO_8859_10_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\261', '\262', '\263', '\264', '\265', '\266', '\247',
'\270', '\271', '\272', '\273', '\274', '\255', '\276', '\277',
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
static const unsigned short EncISO_8859_10_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x34a2,
0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x01a0,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x01a0, 0x30e2, 0x30e2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
};
static int
mbc_case_fold(OnigCaseFoldType flag,
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
{
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
*lower++ = 's';
*lower = 's';
(*pp)++;
return 2;
}
*lower = ENC_ISO_8859_10_TO_LOWER_CASE(*p);
(*pp)++;
return 1;
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
{
int v;
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
(*pp)++;
return TRUE;
}
(*pp)++;
v = (EncISO_8859_10_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
if ((v | BIT_CTYPE_LOWER) != 0) {
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
#endif
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_10_CTYPE(code, ctype);
else
return FALSE;
}
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
{ 0xa1, 0xb1 },
{ 0xa2, 0xb2 },
{ 0xa3, 0xb3 },
{ 0xa4, 0xb4 },
{ 0xa5, 0xb5 },
{ 0xa6, 0xb6 },
{ 0xa8, 0xb8 },
{ 0xa9, 0xb9 },
{ 0xaa, 0xba },
{ 0xab, 0xbb },
{ 0xac, 0xbc },
{ 0xae, 0xbe },
{ 0xaf, 0xbf },
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xf0 },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd7, 0xf7 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xfc },
{ 0xdd, 0xfd },
{ 0xde, 0xfe }
};
static int
apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, f, arg);
}
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, p, end, items);
}
OnigEncodingType OnigEncodingISO_8859_10 = {
onigenc_single_byte_mbc_enc_len,
"ISO-8859-10", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
mbc_case_fold,
apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

View File

@ -0,0 +1,97 @@
/**********************************************************************
iso8859_11.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_IS_ISO_8859_11_CTYPE(code,ctype) \
((EncISO_8859_11_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
static const unsigned short EncISO_8859_11_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000
};
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_11_CTYPE(code, ctype);
else
return FALSE;
}
OnigEncodingType OnigEncodingISO_8859_11 = {
onigenc_single_byte_mbc_enc_len,
"ISO-8859-11", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
onigenc_ascii_mbc_case_fold,
onigenc_ascii_apply_all_case_fold,
onigenc_ascii_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

229
src/Onigmo/enc/iso8859_13.c Normal file
View File

@ -0,0 +1,229 @@
/**********************************************************************
iso8859_13.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_ISO_8859_13_TO_LOWER_CASE(c) EncISO_8859_13_ToLowerCaseTable[c]
#define ENC_IS_ISO_8859_13_CTYPE(code,ctype) \
((EncISO_8859_13_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
static const UChar EncISO_8859_13_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
'\270', '\251', '\272', '\253', '\254', '\255', '\256', '\277',
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
static const unsigned short EncISO_8859_13_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
0x34a2, 0x00a0, 0x34a2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x34a2,
0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x01a0, 0x30e2, 0x00a0, 0x01a0,
0x30e2, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x30e2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x01a0
};
static int
mbc_case_fold(OnigCaseFoldType flag,
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
{
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
*lower++ = 's';
*lower = 's';
(*pp)++;
return 2;
}
*lower = ENC_ISO_8859_13_TO_LOWER_CASE(*p);
(*pp)++;
return 1;
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
{
int v;
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
(*pp)++;
return TRUE;
}
(*pp)++;
v = (EncISO_8859_13_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
if ((v | BIT_CTYPE_LOWER) != 0) {
/* 0xdf, 0xb5 are lower case letter, but can't convert. */
if (*p == 0xb5)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
#endif
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_13_CTYPE(code, ctype);
else
return FALSE;
}
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xf0 },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xfc },
{ 0xdd, 0xfd },
{ 0xde, 0xfe }
};
static int
apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, f, arg);
}
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, p, end, items);
}
OnigEncodingType OnigEncodingISO_8859_13 = {
onigenc_single_byte_mbc_enc_len,
"ISO-8859-13", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
mbc_case_fold,
apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

242
src/Onigmo/enc/iso8859_14.c Normal file
View File

@ -0,0 +1,242 @@
/**********************************************************************
iso8859_14.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_ISO_8859_14_TO_LOWER_CASE(c) EncISO_8859_14_ToLowerCaseTable[c]
#define ENC_IS_ISO_8859_14_CTYPE(code,ctype) \
((EncISO_8859_14_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
static const UChar EncISO_8859_14_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\242', '\242', '\243', '\245', '\245', '\253', '\247',
'\270', '\251', '\272', '\253', '\274', '\255', '\256', '\377',
'\261', '\261', '\263', '\263', '\265', '\265', '\266', '\271',
'\270', '\271', '\272', '\277', '\274', '\276', '\276', '\277',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
static const unsigned short EncISO_8859_14_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x34a2, 0x30e2, 0x00a0, 0x34a2, 0x30e2, 0x34a2, 0x00a0,
0x34a2, 0x00a0, 0x34a2, 0x30e2, 0x34a2, 0x01a0, 0x00a0, 0x34a2,
0x34a2, 0x30e2, 0x34a2, 0x30e2, 0x34a2, 0x30e2, 0x00a0, 0x34a2,
0x30e2, 0x30e2, 0x30e2, 0x34a2, 0x30e2, 0x34a2, 0x30e2, 0x30e2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
};
static int
mbc_case_fold(OnigCaseFoldType flag,
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
{
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
*lower++ = 's';
*lower = 's';
(*pp)++;
return 2;
}
*lower = ENC_ISO_8859_14_TO_LOWER_CASE(*p);
(*pp)++;
return 1; /* return byte length of converted char to lower */
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag,
const UChar** pp, const UChar* end)
{
int v;
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
(*pp)++;
return TRUE;
}
(*pp)++;
v = (EncISO_8859_14_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
if ((v | BIT_CTYPE_LOWER) != 0) {
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
#endif
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_14_CTYPE(code, ctype);
else
return FALSE;
}
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
{ 0xa1, 0xa2 },
{ 0xa4, 0xa5 },
{ 0xa6, 0xab },
{ 0xa8, 0xb8 },
{ 0xaa, 0xba },
{ 0xac, 0xbc },
{ 0xaf, 0xff },
{ 0xb0, 0xb1 },
{ 0xb2, 0xb3 },
{ 0xb4, 0xb5 },
{ 0xb7, 0xb9 },
{ 0xbb, 0xbf },
{ 0xbd, 0xbe },
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xf0 },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd7, 0xf7 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xfc },
{ 0xdd, 0xfd },
{ 0xde, 0xfe }
};
static int
apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, f, arg);
}
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, p, end, items);
}
OnigEncodingType OnigEncodingISO_8859_14 = {
onigenc_single_byte_mbc_enc_len,
"ISO-8859-14", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
mbc_case_fold,
apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

236
src/Onigmo/enc/iso8859_15.c Normal file
View File

@ -0,0 +1,236 @@
/**********************************************************************
iso8859_15.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_ISO_8859_15_TO_LOWER_CASE(c) EncISO_8859_15_ToLowerCaseTable[c]
#define ENC_IS_ISO_8859_15_CTYPE(code,ctype) \
((EncISO_8859_15_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
static const UChar EncISO_8859_15_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\241', '\242', '\243', '\244', '\245', '\250', '\247',
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
'\260', '\261', '\262', '\263', '\270', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\275', '\275', '\377', '\277',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
static const unsigned short EncISO_8859_15_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x34a2, 0x00a0,
0x30e2, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x34a2, 0x30e2, 0x00a0, 0x01a0,
0x30e2, 0x10a0, 0x30e2, 0x01a0, 0x34a2, 0x30e2, 0x34a2, 0x01a0,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
};
static int
mbc_case_fold(OnigCaseFoldType flag,
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
{
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
*lower++ = 's';
*lower = 's';
(*pp)++;
return 2;
}
*lower = ENC_ISO_8859_15_TO_LOWER_CASE(*p);
(*pp)++;
return 1; /* return byte length of converted char to lower */
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag,
const UChar** pp, const UChar* end)
{
int v;
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
(*pp)++;
return TRUE;
}
(*pp)++;
v = (EncISO_8859_15_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
if ((v | BIT_CTYPE_LOWER) != 0) {
/* 0xdf etc.. are lower case letter, but can't convert. */
if (*p == 0xaa || *p == 0xb5 || *p == 0xba)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
#endif
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_15_CTYPE(code, ctype);
else
return FALSE;
}
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
{ 0xa6, 0xa8 },
{ 0xb4, 0xb8 },
{ 0xbc, 0xbd },
{ 0xbe, 0xff },
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xf0 },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xfc },
{ 0xdd, 0xfd },
{ 0xde, 0xfe }
};
static int
apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, f, arg);
}
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, p, end, items);
}
OnigEncodingType OnigEncodingISO_8859_15 = {
onigenc_single_byte_mbc_enc_len,
"ISO-8859-15", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
mbc_case_fold,
apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

238
src/Onigmo/enc/iso8859_16.c Normal file
View File

@ -0,0 +1,238 @@
/**********************************************************************
iso8859_16.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_ISO_8859_16_TO_LOWER_CASE(c) EncISO_8859_16_ToLowerCaseTable[c]
#define ENC_IS_ISO_8859_16_CTYPE(code,ctype) \
((EncISO_8859_16_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
static const UChar EncISO_8859_16_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\242', '\242', '\263', '\245', '\245', '\250', '\247',
'\250', '\251', '\272', '\253', '\256', '\255', '\256', '\277',
'\260', '\261', '\271', '\263', '\270', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\275', '\275', '\377', '\277',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
static const unsigned short EncISO_8859_16_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x34a2, 0x30e2, 0x34a2, 0x00a0, 0x01a0, 0x34a2, 0x00a0,
0x30e2, 0x00a0, 0x34a2, 0x01a0, 0x34a2, 0x01a0, 0x30e2, 0x34a2,
0x00a0, 0x00a0, 0x34a2, 0x30e2, 0x34a2, 0x01a0, 0x00a0, 0x01a0,
0x30e2, 0x30e2, 0x30e2, 0x01a0, 0x34a2, 0x30e2, 0x34a2, 0x30e2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
};
static int
mbc_case_fold(OnigCaseFoldType flag,
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
{
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
*lower++ = 's';
*lower = 's';
(*pp)++;
return 2;
}
*lower = ENC_ISO_8859_16_TO_LOWER_CASE(*p);
(*pp)++;
return 1; /* return byte length of converted char to lower */
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
{
int v;
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
(*pp)++;
return TRUE;
}
(*pp)++;
v = (EncISO_8859_16_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
if ((v | BIT_CTYPE_LOWER) != 0) {
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
#endif
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_16_CTYPE(code, ctype);
else
return FALSE;
}
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
{ 0xa1, 0xa2 },
{ 0xa3, 0xb3 },
{ 0xa6, 0xa8 },
{ 0xaa, 0xba },
{ 0xac, 0xae },
{ 0xaf, 0xbf },
{ 0xb2, 0xb9 },
{ 0xb4, 0xb8 },
{ 0xbc, 0xbd },
{ 0xbe, 0xff },
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xf0 },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd7, 0xf7 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xfc },
{ 0xdd, 0xfd },
{ 0xde, 0xfe }
};
static int
apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, f, arg);
}
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, p, end, items);
}
OnigEncodingType OnigEncodingISO_8859_16 = {
onigenc_single_byte_mbc_enc_len,
"ISO-8859-16", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
mbc_case_fold,
apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

236
src/Onigmo/enc/iso8859_2.c Normal file
View File

@ -0,0 +1,236 @@
/**********************************************************************
iso8859_2.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_ISO_8859_2_TO_LOWER_CASE(c) EncISO_8859_2_ToLowerCaseTable[c]
#define ENC_IS_ISO_8859_2_CTYPE(code,ctype) \
((EncISO_8859_2_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
static const UChar EncISO_8859_2_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\261', '\242', '\263', '\244', '\265', '\266', '\247',
'\250', '\271', '\272', '\273', '\274', '\255', '\276', '\277',
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
static const unsigned short EncISO_8859_2_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x34a2, 0x00a0, 0x34a2, 0x00a0, 0x34a2, 0x34a2, 0x00a0,
0x00a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x34a2,
0x00a0, 0x30e2, 0x00a0, 0x30e2, 0x00a0, 0x30e2, 0x30e2, 0x00a0,
0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, 0x30e2, 0x30e2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0
};
static int
mbc_case_fold(OnigCaseFoldType flag,
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
{
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
*lower++ = 's';
*lower = 's';
(*pp)++;
return 2;
}
*lower = ENC_ISO_8859_2_TO_LOWER_CASE(*p);
(*pp)++;
return 1; /* return byte length of converted char to lower */
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
{
int v;
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
(*pp)++;
return TRUE;
}
(*pp)++;
v = (EncISO_8859_2_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
if ((v | BIT_CTYPE_LOWER) != 0) {
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
#endif
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
{ 0xa1, 0xb1 },
{ 0xa3, 0xb3 },
{ 0xa5, 0xb5 },
{ 0xa6, 0xb6 },
{ 0xa9, 0xb9 },
{ 0xaa, 0xba },
{ 0xab, 0xbb },
{ 0xac, 0xbc },
{ 0xae, 0xbe },
{ 0xaf, 0xbf },
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xf0 },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xfc },
{ 0xdd, 0xfd },
{ 0xde, 0xfe }
};
static int
apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, f, arg);
}
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, p, end, items);
}
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_2_CTYPE(code, ctype);
else
return FALSE;
}
OnigEncodingType OnigEncodingISO_8859_2 = {
onigenc_single_byte_mbc_enc_len,
"ISO-8859-2", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
mbc_case_fold,
apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

236
src/Onigmo/enc/iso8859_3.c Normal file
View File

@ -0,0 +1,236 @@
/**********************************************************************
iso8859_3.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_ISO_8859_3_TO_LOWER_CASE(c) EncISO_8859_3_ToLowerCaseTable[c]
#define ENC_IS_ISO_8859_3_CTYPE(code,ctype) \
((EncISO_8859_3_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
static const UChar EncISO_8859_3_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\261', '\242', '\243', '\244', '\245', '\266', '\247',
'\250', '\271', '\272', '\273', '\274', '\255', '\256', '\277',
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
'\340', '\341', '\342', '\303', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\320', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
static const unsigned short EncISO_8859_3_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x34a2, 0x00a0, 0x00a0, 0x00a0, 0x0000, 0x34a2, 0x00a0,
0x00a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x0000, 0x34a2,
0x00a0, 0x30e2, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x30e2, 0x01a0,
0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x11a0, 0x0000, 0x30e2,
0x34a2, 0x34a2, 0x34a2, 0x0000, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x0000, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x0000, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x0000, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0
};
static int
mbc_case_fold(OnigCaseFoldType flag, const UChar** pp,
const UChar* end ARG_UNUSED, UChar* lower)
{
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
*lower++ = 's';
*lower = 's';
(*pp)++;
return 2;
}
*lower = ENC_ISO_8859_3_TO_LOWER_CASE(*p);
(*pp)++;
return 1;
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
{
int v;
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
(*pp)++;
return TRUE;
}
(*pp)++;
v = (EncISO_8859_3_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
if ((v | BIT_CTYPE_LOWER) != 0) {
/* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
if (*p == 0xb5)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
#endif
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_3_CTYPE(code, ctype);
else
return FALSE;
}
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
{ 0xa1, 0xb1 },
{ 0xa6, 0xb6 },
{ 0xa9, 0xb9 },
{ 0xaa, 0xba },
{ 0xab, 0xbb },
{ 0xac, 0xbc },
{ 0xaf, 0xbf },
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xfc },
{ 0xdd, 0xfd },
{ 0xde, 0xfe }
};
static int
apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, f, arg);
}
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, p, end, items);
}
OnigEncodingType OnigEncodingISO_8859_3 = {
onigenc_single_byte_mbc_enc_len,
"ISO-8859-3", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
mbc_case_fold,
apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

238
src/Onigmo/enc/iso8859_4.c Normal file
View File

@ -0,0 +1,238 @@
/**********************************************************************
iso8859_4.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_ISO_8859_4_TO_LOWER_CASE(c) EncISO_8859_4_ToLowerCaseTable[c]
#define ENC_IS_ISO_8859_4_CTYPE(code,ctype) \
((EncISO_8859_4_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
static const UChar EncISO_8859_4_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\261', '\242', '\263', '\244', '\265', '\266', '\247',
'\250', '\271', '\272', '\273', '\274', '\255', '\276', '\257',
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\274', '\277', '\276', '\277',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
static const unsigned short EncISO_8859_4_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x34a2, 0x30e2, 0x34a2, 0x00a0, 0x34a2, 0x34a2, 0x00a0,
0x00a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x00a0,
0x00a0, 0x30e2, 0x00a0, 0x30e2, 0x00a0, 0x30e2, 0x30e2, 0x00a0,
0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x34a2, 0x30e2, 0x30e2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0
};
static int
mbc_case_fold(OnigCaseFoldType flag,
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
{
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
*lower++ = 's';
*lower = 's';
(*pp)++;
return 2;
}
*lower = ENC_ISO_8859_4_TO_LOWER_CASE(*p);
(*pp)++;
return 1; /* return byte length of converted char to lower */
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
{
int v;
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
(*pp)++;
return TRUE;
}
(*pp)++;
v = (EncISO_8859_4_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
if ((v | BIT_CTYPE_LOWER) != 0) {
if (*p == 0xa2)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
#endif
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_4_CTYPE(code, ctype);
else
return FALSE;
}
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
{ 0xa1, 0xb1 },
{ 0xa3, 0xb3 },
{ 0xa5, 0xb5 },
{ 0xa6, 0xb6 },
{ 0xa9, 0xb9 },
{ 0xaa, 0xba },
{ 0xab, 0xbb },
{ 0xac, 0xbc },
{ 0xae, 0xbe },
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xf0 },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xfc },
{ 0xdd, 0xfd },
{ 0xde, 0xfe }
};
static int
apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, f, arg);
}
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, p, end, items);
}
OnigEncodingType OnigEncodingISO_8859_4 = {
onigenc_single_byte_mbc_enc_len,
"ISO-8859-4", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
mbc_case_fold,
apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

227
src/Onigmo/enc/iso8859_5.c Normal file
View File

@ -0,0 +1,227 @@
/**********************************************************************
iso8859_5.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_ISO_8859_5_TO_LOWER_CASE(c) EncISO_8859_5_ToLowerCaseTable[c]
#define ENC_IS_ISO_8859_5_CTYPE(code,ctype) \
((EncISO_8859_5_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
static const UChar EncISO_8859_5_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\255', '\376', '\377',
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
static const unsigned short EncISO_8859_5_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, 0x30e2, 0x30e2
};
static int
mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
{
const UChar* p = *pp;
*lower = ENC_ISO_8859_5_TO_LOWER_CASE(*p);
(*pp)++;
return 1;
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
{
int v;
const UChar* p = *pp;
(*pp)++;
v = (EncISO_8859_5_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
return (v != 0 ? TRUE : FALSE);
}
#endif
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_5_CTYPE(code, ctype);
else
return FALSE;
}
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
{ 0xa1, 0xf1 },
{ 0xa2, 0xf2 },
{ 0xa3, 0xf3 },
{ 0xa4, 0xf4 },
{ 0xa5, 0xf5 },
{ 0xa6, 0xf6 },
{ 0xa7, 0xf7 },
{ 0xa8, 0xf8 },
{ 0xa9, 0xf9 },
{ 0xaa, 0xfa },
{ 0xab, 0xfb },
{ 0xac, 0xfc },
{ 0xae, 0xfe },
{ 0xaf, 0xff },
{ 0xb0, 0xd0 },
{ 0xb1, 0xd1 },
{ 0xb2, 0xd2 },
{ 0xb3, 0xd3 },
{ 0xb4, 0xd4 },
{ 0xb5, 0xd5 },
{ 0xb6, 0xd6 },
{ 0xb7, 0xd7 },
{ 0xb8, 0xd8 },
{ 0xb9, 0xd9 },
{ 0xba, 0xda },
{ 0xbb, 0xdb },
{ 0xbc, 0xdc },
{ 0xbd, 0xdd },
{ 0xbe, 0xde },
{ 0xbf, 0xdf },
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef }
};
static int
apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
flag, f, arg);
}
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end,
OnigCaseFoldCodeItem items[])
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
flag, p, end, items);
}
OnigEncodingType OnigEncodingISO_8859_5 = {
onigenc_single_byte_mbc_enc_len,
"ISO-8859-5", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
mbc_case_fold,
apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

View File

@ -0,0 +1,97 @@
/**********************************************************************
iso8859_6.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_IS_ISO_8859_6_CTYPE(code,ctype) \
((EncISO_8859_6_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
static const unsigned short EncISO_8859_6_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x0000, 0x0000, 0x0000, 0x00a0, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x01a0, 0x01a0, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x01a0, 0x0000, 0x0000, 0x0000, 0x01a0,
0x0000, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
};
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_6_CTYPE(code, ctype);
else
return FALSE;
}
OnigEncodingType OnigEncodingISO_8859_6 = {
onigenc_single_byte_mbc_enc_len,
"ISO-8859-6", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
onigenc_ascii_mbc_case_fold,
onigenc_ascii_apply_all_case_fold,
onigenc_ascii_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

223
src/Onigmo/enc/iso8859_7.c Normal file
View File

@ -0,0 +1,223 @@
/**********************************************************************
iso8859_7.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_ISO_8859_7_TO_LOWER_CASE(c) EncISO_8859_7_ToLowerCaseTable[c]
#define ENC_IS_ISO_8859_7_CTYPE(code,ctype) \
((EncISO_8859_7_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
static const UChar EncISO_8859_7_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
'\260', '\261', '\262', '\263', '\264', '\265', '\334', '\267',
'\335', '\336', '\337', '\273', '\374', '\275', '\375', '\376',
'\300', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\322', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\334', '\335', '\336', '\337',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
static const unsigned short EncISO_8859_7_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x01a0, 0x01a0, 0x00a0, 0x0000, 0x0000, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x0000, 0x01a0, 0x00a0, 0x01a0, 0x0000, 0x01a0,
0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x00a0, 0x34a2, 0x01a0,
0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x10a0, 0x34a2, 0x34a2,
0x30e2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x0000, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x0000
};
static int
mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
{
const UChar* p = *pp;
*lower = ENC_ISO_8859_7_TO_LOWER_CASE(*p);
(*pp)++;
return 1;
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag,
const UChar** pp, const UChar* end)
{
int v;
const UChar* p = *pp;
(*pp)++;
v = (EncISO_8859_7_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
if ((v | BIT_CTYPE_LOWER) != 0) {
if (*p == 0xc0 || *p == 0xe0)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
#endif
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_7_CTYPE(code, ctype);
else
return FALSE;
}
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
{ 0xb6, 0xdc },
{ 0xb8, 0xdd },
{ 0xb9, 0xde },
{ 0xba, 0xdf },
{ 0xbc, 0xfc },
{ 0xbe, 0xfd },
{ 0xbf, 0xfe },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xf0 },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd7, 0xf7 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb }
};
static int
apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
flag, f, arg);
}
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
flag, p, end, items);
}
OnigEncodingType OnigEncodingISO_8859_7 = {
onigenc_single_byte_mbc_enc_len,
"ISO-8859-7", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
mbc_case_fold,
apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

View File

@ -0,0 +1,97 @@
/**********************************************************************
iso8859_8.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_IS_ISO_8859_8_CTYPE(code,ctype) \
((EncISO_8859_8_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
static const unsigned short EncISO_8859_8_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x0000, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x00a0, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0,
0x00a0, 0x10a0, 0x00a0, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x01a0,
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
};
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_8_CTYPE(code, ctype);
else
return FALSE;
}
OnigEncodingType OnigEncodingISO_8859_8 = {
onigenc_single_byte_mbc_enc_len,
"ISO-8859-8", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
onigenc_ascii_mbc_case_fold,
onigenc_ascii_apply_all_case_fold,
onigenc_ascii_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

229
src/Onigmo/enc/iso8859_9.c Normal file
View File

@ -0,0 +1,229 @@
/**********************************************************************
iso8859_9.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_ISO_8859_9_TO_LOWER_CASE(c) EncISO_8859_9_ToLowerCaseTable[c]
#define ENC_IS_ISO_8859_9_CTYPE(code,ctype) \
((EncISO_8859_9_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
static const UChar EncISO_8859_9_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
'\370', '\371', '\372', '\373', '\374', '\335', '\376', '\337',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
static const unsigned short EncISO_8859_9_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0,
0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
};
static int
mbc_case_fold(OnigCaseFoldType flag,
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
{
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
*lower++ = 's';
*lower = 's';
(*pp)++;
return 2;
}
*lower = ENC_ISO_8859_9_TO_LOWER_CASE(*p);
(*pp)++;
return 1;
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
{
int v;
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
(*pp)++;
return TRUE;
}
(*pp)++;
v = (EncISO_8859_9_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
if ((v | BIT_CTYPE_LOWER) != 0) {
/* 0xdf etc.. are lower case letter, but can't convert. */
if (*p >= 0xaa && *p <= 0xba)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
#endif
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_9_CTYPE(code, ctype);
else
return FALSE;
}
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xf0 },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xfc },
{ 0xdd, 0xfd },
{ 0xde, 0xfe }
};
static int
apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, f, arg);
}
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, p, end, items);
}
OnigEncodingType OnigEncodingISO_8859_9 = {
onigenc_single_byte_mbc_enc_len,
"ISO-8859-9", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
mbc_case_fold,
apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

251
src/Onigmo/enc/koi8.c Normal file
View File

@ -0,0 +1,251 @@
/**********************************************************************
koi8.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_KOI8_TO_LOWER_CASE(c) EncKOI8_ToLowerCaseTable[c]
#define ENC_IS_KOI8_CTYPE(code,ctype) \
((EncKOI8_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
static const UChar EncKOI8_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337'
};
static const unsigned short EncKOI8_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2
};
static int
koi8_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
{
const UChar* p = *pp;
*lower = ENC_KOI8_TO_LOWER_CASE(*p);
(*pp)++;
return 1;
}
#if 0
static int
koi8_is_mbc_ambiguous(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end)
{
const OnigUChar* p = *pp;
(*pp)++;
if (((flag & ONIGENC_CASE_FOLD_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_CASE_FOLD_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
int v = (EncKOI8_CtypeTable[*p] &
(BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
return (v != 0 ? TRUE : FALSE);
}
return FALSE;
}
#endif
static int
koi8_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_KOI8_CTYPE(code, ctype);
else
return FALSE;
}
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xf0 },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd7, 0xf7 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xfc },
{ 0xdd, 0xfd },
{ 0xde, 0xfe },
{ 0xdf, 0xff },
{ 0xe0, 0xc0 },
{ 0xe1, 0xc1 },
{ 0xe2, 0xc2 },
{ 0xe3, 0xc3 },
{ 0xe4, 0xc4 },
{ 0xe5, 0xc5 },
{ 0xe6, 0xc6 },
{ 0xe7, 0xc7 },
{ 0xe8, 0xc8 },
{ 0xe9, 0xc9 },
{ 0xea, 0xca },
{ 0xeb, 0xcb },
{ 0xec, 0xcc },
{ 0xed, 0xcd },
{ 0xee, 0xce },
{ 0xef, 0xcf },
{ 0xf0, 0xd0 },
{ 0xf1, 0xd1 },
{ 0xf2, 0xd2 },
{ 0xf3, 0xd3 },
{ 0xf4, 0xd4 },
{ 0xf5, 0xd5 },
{ 0xf6, 0xd6 },
{ 0xf7, 0xd7 },
{ 0xf8, 0xd8 },
{ 0xf9, 0xd9 },
{ 0xfa, 0xda },
{ 0xfb, 0xdb },
{ 0xfc, 0xdc },
{ 0xfe, 0xde },
{ 0xff, 0xdf }
};
static int
koi8_apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
flag, f, arg);
}
static int
koi8_get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
flag, p, end, items);
}
OnigEncodingType OnigEncodingKOI8 = {
onigenc_single_byte_mbc_enc_len,
"KOI8", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
koi8_mbc_case_fold,
koi8_apply_all_case_fold,
koi8_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
koi8_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

213
src/Onigmo/enc/koi8_r.c Normal file
View File

@ -0,0 +1,213 @@
/**********************************************************************
koi8_r.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_KOI8_R_TO_LOWER_CASE(c) EncKOI8_R_ToLowerCaseTable[c]
#define ENC_IS_KOI8_R_CTYPE(code,ctype) \
((EncKOI8_R_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
static const UChar EncKOI8_R_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
'\260', '\261', '\262', '\243', '\264', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337'
};
static const unsigned short EncKOI8_R_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x0284, 0x00a0, 0x00a0, 0x10a0, 0x01a0, 0x00a0,
0x00a0, 0x00a0, 0x00a0, 0x30e2, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x00a0, 0x34a2, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2
};
static int
koi8_r_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
{
const UChar* p = *pp;
*lower = ENC_KOI8_R_TO_LOWER_CASE(*p);
(*pp)++;
return 1;
}
#if 0
static int
koi8_r_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
{
int v;
const UChar* p = *pp;
(*pp)++;
v = (EncKOI8_R_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
return (v != 0 ? TRUE : FALSE);
}
#endif
static int
koi8_r_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_KOI8_R_CTYPE(code, ctype);
else
return FALSE;
}
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
{ 0xa3, 0xb3 },
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xf0 },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd7, 0xf7 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xfc },
{ 0xdd, 0xfd },
{ 0xde, 0xfe },
{ 0xdf, 0xff }
};
static int
koi8_r_apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
flag, f, arg);
}
static int
koi8_r_get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
flag, p, end, items);
}
OnigEncodingType OnigEncodingKOI8_R = {
onigenc_single_byte_mbc_enc_len,
"KOI8-R", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
koi8_r_mbc_case_fold,
koi8_r_apply_all_case_fold,
koi8_r_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
koi8_r_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

1162
src/Onigmo/enc/mktable.c Normal file

File diff suppressed because it is too large Load Diff

571
src/Onigmo/enc/sjis.c Normal file
View File

@ -0,0 +1,571 @@
/**********************************************************************
sjis.c - Onigmo (Oniguruma-mod) (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regint.h"
#ifdef ENC_CP932
#define ONIG_ENCODING_SELF ONIG_ENCODING_CP932
#else
#define ONIG_ENCODING_SELF ONIG_ENCODING_SJIS
#endif
static const int EncLen_SJIS[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1
};
static const char SJIS_CAN_BE_TRAIL_TABLE[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0
};
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
/* Fullwidth Alphabet */
{ 0x8260, 0x8281 },
{ 0x8261, 0x8282 },
{ 0x8262, 0x8283 },
{ 0x8263, 0x8284 },
{ 0x8264, 0x8285 },
{ 0x8265, 0x8286 },
{ 0x8266, 0x8287 },
{ 0x8267, 0x8288 },
{ 0x8268, 0x8289 },
{ 0x8269, 0x828a },
{ 0x826a, 0x828b },
{ 0x826b, 0x828c },
{ 0x826c, 0x828d },
{ 0x826d, 0x828e },
{ 0x826e, 0x828f },
{ 0x826f, 0x8290 },
{ 0x8270, 0x8291 },
{ 0x8271, 0x8292 },
{ 0x8272, 0x8293 },
{ 0x8273, 0x8294 },
{ 0x8274, 0x8295 },
{ 0x8275, 0x8296 },
{ 0x8276, 0x8297 },
{ 0x8277, 0x8298 },
{ 0x8278, 0x8299 },
{ 0x8279, 0x829a },
/* Greek */
{ 0x839f, 0x83bf },
{ 0x83a0, 0x83c0 },
{ 0x83a1, 0x83c1 },
{ 0x83a2, 0x83c2 },
{ 0x83a3, 0x83c3 },
{ 0x83a4, 0x83c4 },
{ 0x83a5, 0x83c5 },
{ 0x83a6, 0x83c6 },
{ 0x83a7, 0x83c7 },
{ 0x83a8, 0x83c8 },
{ 0x83a9, 0x83c9 },
{ 0x83aa, 0x83ca },
{ 0x83ab, 0x83cb },
{ 0x83ac, 0x83cc },
{ 0x83ad, 0x83cd },
{ 0x83ae, 0x83ce },
{ 0x83af, 0x83cf },
{ 0x83b0, 0x83d0 },
{ 0x83b1, 0x83d1 },
{ 0x83b2, 0x83d2 },
{ 0x83b3, 0x83d3 },
{ 0x83b4, 0x83d4 },
{ 0x83b5, 0x83d5 },
{ 0x83b6, 0x83d6 },
/* Cyrillic */
{ 0x8440, 0x8470 },
{ 0x8441, 0x8471 },
{ 0x8442, 0x8472 },
{ 0x8443, 0x8473 },
{ 0x8444, 0x8474 },
{ 0x8445, 0x8475 },
{ 0x8446, 0x8476 },
{ 0x8447, 0x8477 },
{ 0x8448, 0x8478 },
{ 0x8449, 0x8479 },
{ 0x844a, 0x847a },
{ 0x844b, 0x847b },
{ 0x844c, 0x847c },
{ 0x844d, 0x847d },
{ 0x844e, 0x847e },
{ 0x844f, 0x8480 },
{ 0x8450, 0x8481 },
{ 0x8451, 0x8482 },
{ 0x8452, 0x8483 },
{ 0x8453, 0x8484 },
{ 0x8454, 0x8485 },
{ 0x8455, 0x8486 },
{ 0x8456, 0x8487 },
{ 0x8457, 0x8488 },
{ 0x8458, 0x8489 },
{ 0x8459, 0x848a },
{ 0x845a, 0x848b },
{ 0x845b, 0x848c },
{ 0x845c, 0x848d },
{ 0x845d, 0x848e },
{ 0x845e, 0x848f },
{ 0x845f, 0x8490 },
{ 0x8460, 0x8491 },
};
#define SJIS_ISMB_FIRST(byte) (EncLen_SJIS[byte] > 1)
#define SJIS_ISMB_TRAIL(byte) SJIS_CAN_BE_TRAIL_TABLE[(byte)]
static int
mbc_enc_len(const UChar* p)
{
return EncLen_SJIS[*p];
}
static int
code_to_mbclen(OnigCodePoint code)
{
if (code < 256) {
if (EncLen_SJIS[(int )code] == 1)
return 1;
else
return 0;
}
else if (code <= 0xffff) {
int low = code & 0xff;
if (! SJIS_ISMB_TRAIL(low))
return ONIGERR_INVALID_CODE_POINT_VALUE;
return 2;
}
else
return ONIGERR_INVALID_CODE_POINT_VALUE;
}
static OnigCodePoint
mbc_to_code(const UChar* p, const UChar* end)
{
int c, i, len;
OnigCodePoint n;
len = mbc_enc_len(p);
c = *p++;
n = c;
if (len == 1) return n;
for (i = 1; i < len; i++) {
if (p >= end) break;
c = *p++;
n <<= 8; n += c;
}
return n;
}
static int
code_to_mbc(OnigCodePoint code, UChar *buf)
{
UChar *p = buf;
if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff));
*p++ = (UChar )(code & 0xff);
#if 0
if (mbc_enc_len(buf) != (p - buf))
return REGERR_INVALID_CODE_POINT_VALUE;
#endif
return (int )(p - buf);
}
static int
apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
flag, f, arg);
}
static OnigCodePoint
get_lower_case(OnigCodePoint code)
{
if (ONIGENC_IS_IN_RANGE(code, 0x8260, 0x8279)) {
/* Fullwidth Alphabet */
return (OnigCodePoint )(code + 0x0021);
}
else if (ONIGENC_IS_IN_RANGE(code, 0x839f, 0x83b6)) {
/* Greek */
return (OnigCodePoint )(code + 0x0020);
}
else if (ONIGENC_IS_IN_RANGE(code, 0x8440, 0x8460)) {
/* Cyrillic */
int d = (code >= 0x844f) ? 1 : 0;
return (OnigCodePoint )(code + (0x0030 + d));
}
return code;
}
static OnigCodePoint
get_upper_case(OnigCodePoint code)
{
if (ONIGENC_IS_IN_RANGE(code, 0x8281, 0x829a)) {
/* Fullwidth Alphabet */
return (OnigCodePoint )(code - 0x0021);
}
else if (ONIGENC_IS_IN_RANGE(code, 0x83bf, 0x83d6)) {
/* Greek */
return (OnigCodePoint )(code - 0x0020);
}
else if (ONIGENC_IS_IN_RANGE(code, 0x8470, 0x847e) ||
ONIGENC_IS_IN_RANGE(code, 0x8480, 0x8491)) {
/* Cyrillic */
int d = (code >= 0x8480) ? 1 : 0;
return (OnigCodePoint )(code - (0x0030 - d));
}
return code;
}
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end,
OnigCaseFoldCodeItem items[])
{
int len;
OnigCodePoint code, code_lo, code_up;
code = mbc_to_code(p, end);
if (ONIGENC_IS_ASCII_CODE(code))
return onigenc_ascii_get_case_fold_codes_by_str(flag, p, end, items);
len = mbc_enc_len(p);
code_lo = get_lower_case(code);
code_up = get_upper_case(code);
if (code != code_lo) {
items[0].byte_len = len;
items[0].code_len = 1;
items[0].code[0] = code_lo;
return 1;
}
else if (code != code_up) {
items[0].byte_len = len;
items[0].code_len = 1;
items[0].code[0] = code_up;
return 1;
}
return 0;
}
static int
mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
{
const UChar* p = *pp;
if (ONIGENC_IS_MBC_ASCII(p)) {
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
(*pp)++;
return 1;
}
else {
OnigCodePoint code;
int len;
code = get_lower_case(mbc_to_code(p, end));
len = code_to_mbc(code, lower);
(*pp) += len;
return len; /* return byte length of converted char to lower */
}
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag,
const UChar** pp, const UChar* end)
{
return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_SELF, flag, pp, end);
}
#endif
#if 0
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
else {
if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
return (code_to_mbclen(code) > 1 ? TRUE : FALSE);
}
}
return FALSE;
}
#endif
static UChar*
left_adjust_char_head(const UChar* start, const UChar* s)
{
const UChar *p;
int len;
if (s <= start) return (UChar* )s;
p = s;
if (SJIS_ISMB_TRAIL(*p)) {
while (p > start) {
if (! SJIS_ISMB_FIRST(*--p)) {
p++;
break;
}
}
}
len = mbc_enc_len(p);
if (p + len > s) return (UChar* )p;
p += len;
return (UChar* )(p + ((s - p) & ~1));
}
static int
is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
{
const UChar c = *s;
return (SJIS_ISMB_TRAIL(c) ? FALSE : TRUE);
}
static int PropertyInited = 0;
static const OnigCodePoint** PropertyList;
static int PropertyListNum;
static int PropertyListSize;
static hash_table_type* PropertyNameTable;
static const OnigCodePoint CR_Hiragana[] = {
1,
0x829f, 0x82f1
}; /* CR_Hiragana */
static const OnigCodePoint CR_Katakana[] = {
4,
0x00a6, 0x00af,
0x00b1, 0x00dd,
0x8340, 0x837e,
0x8380, 0x8396,
}; /* CR_Katakana */
#ifdef ENC_CP932
static const OnigCodePoint CR_Han[] = {
6,
0x8157, 0x8157,
0x889f, 0x9872, /* Kanji level 1 */
0x989f, 0x9ffc, /* Kanji level 2 */
0xe040, 0xeaa4, /* Kanji level 2 */
0xed40, 0xeeec, /* NEC-selected IBM extended characters (without symbols) */
0xfa5c, 0xfc4b, /* IBM extended characters (without symbols) */
}; /* CR_Han */
#else
static const OnigCodePoint CR_Han[] = {
4,
0x8157, 0x8157,
0x889f, 0x9872, /* Kanji level 1 */
0x989f, 0x9ffc, /* Kanji level 2 */
0xe040, 0xeaa4, /* Kanji level 2 */
}; /* CR_Han */
#endif
static const OnigCodePoint CR_Latin[] = {
4,
0x0041, 0x005a,
0x0061, 0x007a,
0x8260, 0x8279,
0x8281, 0x829a,
}; /* CR_Latin */
static const OnigCodePoint CR_Greek[] = {
2,
0x839f, 0x83b6,
0x83bf, 0x83d6,
}; /* CR_Greek */
static const OnigCodePoint CR_Cyrillic[] = {
3,
0x8440, 0x8460,
0x8470, 0x847f,
0x8480, 0x8491,
}; /* CR_Cyrillic */
static int
init_property_list(void)
{
int r;
PROPERTY_LIST_ADD_PROP("hiragana", CR_Hiragana);
PROPERTY_LIST_ADD_PROP("katakana", CR_Katakana);
PROPERTY_LIST_ADD_PROP("han", CR_Han);
PROPERTY_LIST_ADD_PROP("latin", CR_Latin);
PROPERTY_LIST_ADD_PROP("greek", CR_Greek);
PROPERTY_LIST_ADD_PROP("cyrillic", CR_Cyrillic);
PropertyInited = 1;
end:
return r;
}
static int
property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
{
hash_data_type ctype;
UChar *s, *e;
PROPERTY_LIST_INIT_CHECK;
s = e = xalloca(end - p + 1);
for (; p < end; p++) {
*e++ = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
}
if (onig_st_lookup_strend(PropertyNameTable, s, e, &ctype) == 0) {
return onigenc_minimum_property_name_to_ctype(enc, s, e);
}
return (int )ctype;
}
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (ctype <= ONIGENC_MAX_STD_CTYPE) {
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
else {
if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
return (code_to_mbclen(code) > 1 ? TRUE : FALSE);
}
}
}
else {
PROPERTY_LIST_INIT_CHECK;
ctype -= (ONIGENC_MAX_STD_CTYPE + 1);
if (ctype >= (unsigned int )PropertyListNum)
return ONIGERR_TYPE_BUG;
return onig_is_in_code_range((UChar* )PropertyList[ctype], code);
}
return FALSE;
}
static int
get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out,
const OnigCodePoint* ranges[])
{
if (ctype <= ONIGENC_MAX_STD_CTYPE) {
return ONIG_NO_SUPPORT_CONFIG;
}
else {
*sb_out = 0x80;
PROPERTY_LIST_INIT_CHECK;
ctype -= (ONIGENC_MAX_STD_CTYPE + 1);
if (ctype >= (OnigCtype )PropertyListNum)
return ONIGERR_TYPE_BUG;
*ranges = PropertyList[ctype];
return 0;
}
}
#ifdef ENC_CP932
OnigEncodingType OnigEncodingCP932 = {
mbc_enc_len,
"CP932", /* name */
2, /* max byte length */
1, /* min byte length */
onigenc_is_mbc_newline_0x0a,
mbc_to_code,
code_to_mbclen,
code_to_mbc,
mbc_case_fold,
apply_all_case_fold,
get_case_fold_codes_by_str,
property_name_to_ctype,
is_code_ctype,
get_ctype_code_range,
left_adjust_char_head,
is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};
#else
OnigEncodingType OnigEncodingSJIS = {
mbc_enc_len,
"Shift_JIS", /* name */
2, /* max byte length */
1, /* min byte length */
onigenc_is_mbc_newline_0x0a,
mbc_to_code,
code_to_mbclen,
code_to_mbc,
mbc_case_fold,
apply_all_case_fold,
get_case_fold_codes_by_str,
property_name_to_ctype,
is_code_ctype,
get_ctype_code_range,
left_adjust_char_head,
is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};
#endif

677
src/Onigmo/enc/unicode.c Normal file
View File

@ -0,0 +1,677 @@
/**********************************************************************
unicode.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regint.h"
#define ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code,ctype) \
((EncUNICODE_ISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
#if 0
#define ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(code,cbit) \
((EncUNICODE_ISO_8859_1_CtypeTable[code] & (cbit)) != 0)
#endif
static const unsigned short EncUNICODE_ISO_8859_1_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0,
0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
};
#include "enc/unicode/name2ctype.h"
typedef struct {
int n;
OnigCodePoint code[3];
} CodePointList3;
typedef struct {
OnigCodePoint from;
CodePointList3 to;
} CaseFold_11_Type;
typedef struct {
OnigCodePoint from;
CodePointList3 to;
} CaseUnfold_11_Type;
typedef struct {
int n;
OnigCodePoint code[2];
} CodePointList2;
typedef struct {
OnigCodePoint from[2];
CodePointList2 to;
} CaseUnfold_12_Type;
typedef struct {
OnigCodePoint from[3];
CodePointList2 to;
} CaseUnfold_13_Type;
#include "enc/unicode/casefold.h"
#define numberof(array) (int)(sizeof(array) / sizeof((array)[0]))
#define CODE_RANGES_NUM numberof(CodeRanges)
extern int
onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (
#ifdef USE_UNICODE_PROPERTIES
ctype <= ONIGENC_MAX_STD_CTYPE &&
#endif
code < 256) {
return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype);
}
if (ctype >= CODE_RANGES_NUM) {
return ONIGERR_TYPE_BUG;
}
return onig_is_in_code_range((UChar* )CodeRanges[ctype], code);
}
extern int
onigenc_unicode_ctype_code_range(int ctype, const OnigCodePoint* ranges[])
{
if (ctype >= CODE_RANGES_NUM) {
return ONIGERR_TYPE_BUG;
}
*ranges = CodeRanges[ctype];
return 0;
}
extern int
onigenc_utf16_32_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out,
const OnigCodePoint* ranges[])
{
*sb_out = 0x00;
return onigenc_unicode_ctype_code_range(ctype, ranges);
}
#include "st.h"
#define PROPERTY_NAME_MAX_SIZE (MAX_WORD_LENGTH + 1)
extern int
onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* name, UChar* end)
{
int len;
int ctype;
UChar buf[PROPERTY_NAME_MAX_SIZE];
UChar *p;
OnigCodePoint code;
len = 0;
for (p = name; p < end; p += enclen(enc, p)) {
code = ONIGENC_MBC_TO_CODE(enc, p, end);
if (code == ' ' || code == '-' || code == '_')
continue;
if (code >= 0x80)
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
buf[len++] = ONIGENC_ASCII_CODE_TO_LOWER_CASE(code);
if (len >= PROPERTY_NAME_MAX_SIZE)
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
}
buf[len] = 0;
if ((ctype = uniname2ctype(buf, len)) < 0) {
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
}
return ctype;
}
static int
code2_cmp(OnigCodePoint* x, OnigCodePoint* y)
{
if (x[0] == y[0] && x[1] == y[1]) return 0;
return 1;
}
static int
code2_hash(OnigCodePoint* x)
{
return (int )(x[0] + x[1]);
}
static struct st_hash_type type_code2_hash = {
code2_cmp,
code2_hash,
};
static int
code3_cmp(OnigCodePoint* x, OnigCodePoint* y)
{
if (x[0] == y[0] && x[1] == y[1] && x[2] == y[2]) return 0;
return 1;
}
static int
code3_hash(OnigCodePoint* x)
{
return (int )(x[0] + x[1] + x[2]);
}
static struct st_hash_type type_code3_hash = {
code3_cmp,
code3_hash,
};
static st_table* FoldTable; /* fold-1, fold-2, fold-3 */
static st_table* Unfold1Table;
static st_table* Unfold2Table;
static st_table* Unfold3Table;
static int CaseFoldInited = 0;
static int init_case_fold_table(void)
{
const CaseFold_11_Type *p;
const CaseUnfold_11_Type *p1;
const CaseUnfold_12_Type *p2;
const CaseUnfold_13_Type *p3;
int i;
THREAD_ATOMIC_START;
FoldTable = st_init_numtable_with_size(FOLD_TABLE_SIZE);
if (ONIG_IS_NULL(FoldTable)) return ONIGERR_MEMORY;
for (i = 0; i < numberof(CaseFold); i++) {
p = &CaseFold[i];
st_add_direct(FoldTable, (st_data_t )p->from, (st_data_t )&(p->to));
}
for (i = 0; i < numberof(CaseFold_Locale); i++) {
p = &CaseFold_Locale[i];
st_add_direct(FoldTable, (st_data_t )p->from, (st_data_t )&(p->to));
}
Unfold1Table = st_init_numtable_with_size(UNFOLD1_TABLE_SIZE);
if (ONIG_IS_NULL(Unfold1Table)) return ONIGERR_MEMORY;
for (i = 0; i < numberof(CaseUnfold_11); i++) {
p1 = &CaseUnfold_11[i];
st_add_direct(Unfold1Table, (st_data_t )p1->from, (st_data_t )&(p1->to));
}
for (i = 0; i < numberof(CaseUnfold_11_Locale); i++) {
p1 = &CaseUnfold_11_Locale[i];
st_add_direct(Unfold1Table, (st_data_t )p1->from, (st_data_t )&(p1->to));
}
Unfold2Table = st_init_table_with_size(&type_code2_hash, UNFOLD2_TABLE_SIZE);
if (ONIG_IS_NULL(Unfold2Table)) return ONIGERR_MEMORY;
for (i = 0; i < numberof(CaseUnfold_12); i++) {
p2 = &CaseUnfold_12[i];
st_add_direct(Unfold2Table, (st_data_t )p2->from, (st_data_t )(&p2->to));
}
for (i = 0; i < numberof(CaseUnfold_12_Locale); i++) {
p2 = &CaseUnfold_12_Locale[i];
st_add_direct(Unfold2Table, (st_data_t )p2->from, (st_data_t )(&p2->to));
}
Unfold3Table = st_init_table_with_size(&type_code3_hash, UNFOLD3_TABLE_SIZE);
if (ONIG_IS_NULL(Unfold3Table)) return ONIGERR_MEMORY;
for (i = 0; i < numberof(CaseUnfold_13); i++) {
p3 = &CaseUnfold_13[i];
st_add_direct(Unfold3Table, (st_data_t )p3->from, (st_data_t )(&p3->to));
}
CaseFoldInited = 1;
THREAD_ATOMIC_END;
return 0;
}
extern int
onigenc_unicode_mbc_case_fold(OnigEncoding enc,
OnigCaseFoldType flag ARG_UNUSED, const UChar** pp, const UChar* end,
UChar* fold)
{
CodePointList3 *to;
OnigCodePoint code;
int i, len, rlen;
const UChar *p = *pp;
if (CaseFoldInited == 0) init_case_fold_table();
code = ONIGENC_MBC_TO_CODE(enc, p, end);
len = enclen(enc, p);
*pp += len;
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
if (code == 0x0049) {
return ONIGENC_CODE_TO_MBC(enc, 0x0131, fold);
}
else if (code == 0x0130) {
return ONIGENC_CODE_TO_MBC(enc, 0x0069, fold);
}
}
#endif
if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0) {
if (to->n == 1) {
return ONIGENC_CODE_TO_MBC(enc, to->code[0], fold);
}
#if 0
/* NO NEEDS TO CHECK */
else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
#else
else {
#endif
rlen = 0;
for (i = 0; i < to->n; i++) {
len = ONIGENC_CODE_TO_MBC(enc, to->code[i], fold);
fold += len;
rlen += len;
}
return rlen;
}
}
for (i = 0; i < len; i++) {
*fold++ = *p++;
}
return len;
}
extern int
onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg)
{
const CaseUnfold_11_Type* p11;
OnigCodePoint code;
int i, j, k, r;
/* if (CaseFoldInited == 0) init_case_fold_table(); */
for (i = 0; i < numberof(CaseUnfold_11); i++) {
p11 = &CaseUnfold_11[i];
for (j = 0; j < p11->to.n; j++) {
code = p11->from;
r = (*f)(p11->to.code[j], &code, 1, arg);
if (r != 0) return r;
code = p11->to.code[j];
r = (*f)(p11->from, &code, 1, arg);
if (r != 0) return r;
for (k = 0; k < j; k++) {
r = (*f)(p11->to.code[j], (OnigCodePoint* )(&p11->to.code[k]), 1, arg);
if (r != 0) return r;
r = (*f)(p11->to.code[k], (OnigCodePoint* )(&p11->to.code[j]), 1, arg);
if (r != 0) return r;
}
}
}
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
code = 0x0131;
r = (*f)(0x0049, &code, 1, arg);
if (r != 0) return r;
code = 0x0049;
r = (*f)(0x0131, &code, 1, arg);
if (r != 0) return r;
code = 0x0130;
r = (*f)(0x0069, &code, 1, arg);
if (r != 0) return r;
code = 0x0069;
r = (*f)(0x0130, &code, 1, arg);
if (r != 0) return r;
}
else {
#endif
for (i = 0; i < numberof(CaseUnfold_11_Locale); i++) {
p11 = &CaseUnfold_11_Locale[i];
for (j = 0; j < p11->to.n; j++) {
code = p11->from;
r = (*f)(p11->to.code[j], &code, 1, arg);
if (r != 0) return r;
code = p11->to.code[j];
r = (*f)(p11->from, &code, 1, arg);
if (r != 0) return r;
for (k = 0; k < j; k++) {
r = (*f)(p11->to.code[j], (OnigCodePoint* )(&p11->to.code[k]),
1, arg);
if (r != 0) return r;
r = (*f)(p11->to.code[k], (OnigCodePoint* )(&p11->to.code[j]),
1, arg);
if (r != 0) return r;
}
}
}
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
}
#endif
if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
for (i = 0; i < numberof(CaseUnfold_12); i++) {
for (j = 0; j < CaseUnfold_12[i].to.n; j++) {
r = (*f)(CaseUnfold_12[i].to.code[j],
(OnigCodePoint* )CaseUnfold_12[i].from, 2, arg);
if (r != 0) return r;
for (k = 0; k < CaseUnfold_12[i].to.n; k++) {
if (k == j) continue;
r = (*f)(CaseUnfold_12[i].to.code[j],
(OnigCodePoint* )(&CaseUnfold_12[i].to.code[k]), 1, arg);
if (r != 0) return r;
}
}
}
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) == 0) {
#endif
for (i = 0; i < numberof(CaseUnfold_12_Locale); i++) {
for (j = 0; j < CaseUnfold_12_Locale[i].to.n; j++) {
r = (*f)(CaseUnfold_12_Locale[i].to.code[j],
(OnigCodePoint* )CaseUnfold_12_Locale[i].from, 2, arg);
if (r != 0) return r;
for (k = 0; k < CaseUnfold_12_Locale[i].to.n; k++) {
if (k == j) continue;
r = (*f)(CaseUnfold_12_Locale[i].to.code[j],
(OnigCodePoint* )(&CaseUnfold_12_Locale[i].to.code[k]),
1, arg);
if (r != 0) return r;
}
}
}
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
}
#endif
for (i = 0; i < numberof(CaseUnfold_13); i++) {
for (j = 0; j < CaseUnfold_13[i].to.n; j++) {
r = (*f)(CaseUnfold_13[i].to.code[j],
(OnigCodePoint* )CaseUnfold_13[i].from, 3, arg);
if (r != 0) return r;
for (k = 0; k < CaseUnfold_13[i].to.n; k++) {
if (k == j) continue;
r = (*f)(CaseUnfold_13[i].to.code[j],
(OnigCodePoint* )(&CaseUnfold_13[i].to.code[k]), 1, arg);
if (r != 0) return r;
}
}
}
}
return 0;
}
extern int
onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end,
OnigCaseFoldCodeItem items[])
{
int n, i, j, k, len;
OnigCodePoint code, codes[3];
CodePointList3 *to, *z3;
CodePointList2 *z2;
if (CaseFoldInited == 0) init_case_fold_table();
n = 0;
code = ONIGENC_MBC_TO_CODE(enc, p, end);
len = enclen(enc, p);
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
if (code == 0x0049) {
items[0].byte_len = len;
items[0].code_len = 1;
items[0].code[0] = 0x0131;
return 1;
}
else if (code == 0x0130) {
items[0].byte_len = len;
items[0].code_len = 1;
items[0].code[0] = 0x0069;
return 1;
}
else if (code == 0x0131) {
items[0].byte_len = len;
items[0].code_len = 1;
items[0].code[0] = 0x0049;
return 1;
}
else if (code == 0x0069) {
items[0].byte_len = len;
items[0].code_len = 1;
items[0].code[0] = 0x0130;
return 1;
}
}
#endif
if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0) {
if (to->n == 1) {
OnigCodePoint orig_code = code;
items[0].byte_len = len;
items[0].code_len = 1;
items[0].code[0] = to->code[0];
n++;
code = to->code[0];
if (onig_st_lookup(Unfold1Table, (st_data_t )code, (void* )&to) != 0) {
for (i = 0; i < to->n; i++) {
if (to->code[i] != orig_code) {
items[n].byte_len = len;
items[n].code_len = 1;
items[n].code[0] = to->code[i];
n++;
}
}
}
}
else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
OnigCodePoint cs[3][4];
int fn, ncs[3];
for (fn = 0; fn < to->n; fn++) {
cs[fn][0] = to->code[fn];
if (onig_st_lookup(Unfold1Table, (st_data_t )cs[fn][0],
(void* )&z3) != 0) {
for (i = 0; i < z3->n; i++) {
cs[fn][i+1] = z3->code[i];
}
ncs[fn] = z3->n + 1;
}
else
ncs[fn] = 1;
}
if (fn == 2) {
for (i = 0; i < ncs[0]; i++) {
for (j = 0; j < ncs[1]; j++) {
items[n].byte_len = len;
items[n].code_len = 2;
items[n].code[0] = cs[0][i];
items[n].code[1] = cs[1][j];
n++;
}
}
if (onig_st_lookup(Unfold2Table, (st_data_t )to->code,
(void* )&z2) != 0) {
for (i = 0; i < z2->n; i++) {
if (z2->code[i] == code) continue;
items[n].byte_len = len;
items[n].code_len = 1;
items[n].code[0] = z2->code[i];
n++;
}
}
}
else {
for (i = 0; i < ncs[0]; i++) {
for (j = 0; j < ncs[1]; j++) {
for (k = 0; k < ncs[2]; k++) {
items[n].byte_len = len;
items[n].code_len = 3;
items[n].code[0] = cs[0][i];
items[n].code[1] = cs[1][j];
items[n].code[2] = cs[2][k];
n++;
}
}
}
if (onig_st_lookup(Unfold3Table, (st_data_t )to->code,
(void* )&z2) != 0) {
for (i = 0; i < z2->n; i++) {
if (z2->code[i] == code) continue;
items[n].byte_len = len;
items[n].code_len = 1;
items[n].code[0] = z2->code[i];
n++;
}
}
}
/* multi char folded code is not head of another folded multi char */
flag = 0; /* DISABLE_CASE_FOLD_MULTI_CHAR(flag); */
}
}
else {
if (onig_st_lookup(Unfold1Table, (st_data_t )code, (void* )&to) != 0) {
for (i = 0; i < to->n; i++) {
items[n].byte_len = len;
items[n].code_len = 1;
items[n].code[0] = to->code[i];
n++;
}
}
}
if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
p += len;
if (p < end) {
int clen;
codes[0] = code;
code = ONIGENC_MBC_TO_CODE(enc, p, end);
if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0
&& to->n == 1) {
codes[1] = to->code[0];
}
else
codes[1] = code;
clen = enclen(enc, p);
len += clen;
if (onig_st_lookup(Unfold2Table, (st_data_t )codes, (void* )&z2) != 0) {
for (i = 0; i < z2->n; i++) {
items[n].byte_len = len;
items[n].code_len = 1;
items[n].code[0] = z2->code[i];
n++;
}
}
p += clen;
if (p < end) {
code = ONIGENC_MBC_TO_CODE(enc, p, end);
if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0
&& to->n == 1) {
codes[2] = to->code[0];
}
else
codes[2] = code;
clen = enclen(enc, p);
len += clen;
if (onig_st_lookup(Unfold3Table, (st_data_t )codes,
(void* )&z2) != 0) {
for (i = 0; i < z2->n; i++) {
items[n].byte_len = len;
items[n].code_len = 1;
items[n].code[0] = z2->code[i];
n++;
}
}
}
}
}
return n;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

223
src/Onigmo/enc/utf16_be.c Normal file
View File

@ -0,0 +1,223 @@
/**********************************************************************
utf16_be.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
static const int EncLen_UTF16[] = {
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
};
static int
utf16be_mbc_enc_len(const UChar* p)
{
return EncLen_UTF16[*p];
}
static int
utf16be_is_mbc_newline(const UChar* p, const UChar* end)
{
if (p + 1 < end) {
if (*(p+1) == 0x0a && *p == 0x00)
return 1;
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
if ((*(p+1) == 0x0b || *(p+1) == 0x0c || *(p+1) == 0x0d || *(p+1) == 0x85)
&& *p == 0x00)
return 1;
if (*p == 0x20 && (*(p+1) == 0x29 || *(p+1) == 0x28))
return 1;
#endif
}
return 0;
}
static OnigCodePoint
utf16be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
{
OnigCodePoint code;
if (UTF16_IS_SURROGATE_FIRST(*p)) {
code = ((((p[0] - 0xd8) << 2) + ((p[1] & 0xc0) >> 6) + 1) << 16)
+ ((((p[1] & 0x3f) << 2) + (p[2] - 0xdc)) << 8)
+ p[3];
}
else {
code = p[0] * 256 + p[1];
}
return code;
}
static int
utf16be_code_to_mbclen(OnigCodePoint code)
{
return (code > 0xffff ? 4 : 2);
}
static int
utf16be_code_to_mbc(OnigCodePoint code, UChar *buf)
{
UChar* p = buf;
if (code > 0xffff) {
unsigned int plane, high;
plane = (code >> 16) - 1;
*p++ = (plane >> 2) + 0xd8;
high = (code & 0xff00) >> 8;
*p++ = ((plane & 0x03) << 6) + (high >> 2);
*p++ = (high & 0x03) + 0xdc;
*p = (UChar )(code & 0xff);
return 4;
}
else {
*p++ = (UChar )((code & 0xff00) >> 8);
*p++ = (UChar )(code & 0xff);
return 2;
}
}
static int
utf16be_mbc_case_fold(OnigCaseFoldType flag,
const UChar** pp, const UChar* end, UChar* fold)
{
const UChar* p = *pp;
if (ONIGENC_IS_ASCII_CODE(*(p+1)) && *p == 0) {
p++;
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
if (*p == 0x49) {
*fold++ = 0x01;
*fold = 0x31;
(*pp) += 2;
return 2;
}
}
#endif
*fold++ = 0;
*fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
*pp += 2;
return 2;
}
else
return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF16_BE, flag,
pp, end, fold);
}
#if 0
static int
utf16be_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
{
const UChar* p = *pp;
(*pp) += EncLen_UTF16[*p];
if (*p == 0) {
int c, v;
p++;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
return TRUE;
}
c = *p;
v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c,
(BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
if ((v | BIT_CTYPE_LOWER) != 0) {
/* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
if (c >= 0xaa && c <= 0xba)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
return FALSE;
}
#endif
static UChar*
utf16be_left_adjust_char_head(const UChar* start, const UChar* s)
{
if (s <= start) return (UChar* )s;
if ((s - start) % 2 == 1) {
s--;
}
if (UTF16_IS_SURROGATE_SECOND(*s) && s > start + 1)
s -= 2;
return (UChar* )s;
}
static int
utf16be_get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF16_BE,
flag, p, end, items);
}
OnigEncodingType OnigEncodingUTF16_BE = {
utf16be_mbc_enc_len,
"UTF-16BE", /* name */
4, /* max byte length */
2, /* min byte length */
utf16be_is_mbc_newline,
utf16be_mbc_to_code,
utf16be_code_to_mbclen,
utf16be_code_to_mbc,
utf16be_mbc_case_fold,
onigenc_unicode_apply_all_case_fold,
utf16be_get_case_fold_codes_by_str,
onigenc_unicode_property_name_to_ctype,
onigenc_unicode_is_code_ctype,
onigenc_utf16_32_get_ctype_code_range,
utf16be_left_adjust_char_head,
onigenc_always_false_is_allowed_reverse_match,
ONIGENC_FLAG_UNICODE,
};

224
src/Onigmo/enc/utf16_le.c Normal file
View File

@ -0,0 +1,224 @@
/**********************************************************************
utf16_le.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
static const int EncLen_UTF16[] = {
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
};
static int
utf16le_code_to_mbclen(OnigCodePoint code)
{
return (code > 0xffff ? 4 : 2);
}
static int
utf16le_mbc_enc_len(const UChar* p)
{
return EncLen_UTF16[*(p+1)];
}
static int
utf16le_is_mbc_newline(const UChar* p, const UChar* end)
{
if (p + 1 < end) {
if (*p == 0x0a && *(p+1) == 0x00)
return 1;
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
if ((*p == 0x0b || *p == 0x0c || *p == 0x0d || *p == 0x85)
&& *(p+1) == 0x00)
return 1;
if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28))
return 1;
#endif
}
return 0;
}
static OnigCodePoint
utf16le_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
{
OnigCodePoint code;
UChar c0 = *p;
UChar c1 = *(p+1);
if (UTF16_IS_SURROGATE_FIRST(c1)) {
code = ((((c1 - 0xd8) << 2) + ((c0 & 0xc0) >> 6) + 1) << 16)
+ ((((c0 & 0x3f) << 2) + (p[3] - 0xdc)) << 8)
+ p[2];
}
else {
code = c1 * 256 + p[0];
}
return code;
}
static int
utf16le_code_to_mbc(OnigCodePoint code, UChar *buf)
{
UChar* p = buf;
if (code > 0xffff) {
unsigned int plane, high;
plane = (code >> 16) - 1;
high = (code & 0xff00) >> 8;
*p++ = ((plane & 0x03) << 6) + (high >> 2);
*p++ = (plane >> 2) + 0xd8;
*p++ = (UChar )(code & 0xff);
*p = (high & 0x03) + 0xdc;
return 4;
}
else {
*p++ = (UChar )(code & 0xff);
*p++ = (UChar )((code & 0xff00) >> 8);
return 2;
}
}
static int
utf16le_mbc_case_fold(OnigCaseFoldType flag,
const UChar** pp, const UChar* end, UChar* fold)
{
const UChar* p = *pp;
if (ONIGENC_IS_ASCII_CODE(*p) && *(p+1) == 0) {
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
if (*p == 0x49) {
*fold++ = 0x31;
*fold = 0x01;
(*pp) += 2;
return 2;
}
}
#endif
*fold++ = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
*fold = 0;
*pp += 2;
return 2;
}
else
return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF16_LE, flag, pp, end,
fold);
}
#if 0
static int
utf16le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp,
const UChar* end)
{
const UChar* p = *pp;
(*pp) += EncLen_UTF16[*(p+1)];
if (*(p+1) == 0) {
int c, v;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
return TRUE;
}
c = *p;
v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c,
(BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
if ((v | BIT_CTYPE_LOWER) != 0) {
/* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
if (c >= 0xaa && c <= 0xba)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
return FALSE;
}
#endif
static UChar*
utf16le_left_adjust_char_head(const UChar* start, const UChar* s)
{
if (s <= start) return (UChar* )s;
if ((s - start) % 2 == 1) {
s--;
}
if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1)
s -= 2;
return (UChar* )s;
}
static int
utf16le_get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF16_LE,
flag, p, end, items);
}
OnigEncodingType OnigEncodingUTF16_LE = {
utf16le_mbc_enc_len,
"UTF-16LE", /* name */
4, /* max byte length */
2, /* min byte length */
utf16le_is_mbc_newline,
utf16le_mbc_to_code,
utf16le_code_to_mbclen,
utf16le_code_to_mbc,
utf16le_mbc_case_fold,
onigenc_unicode_apply_all_case_fold,
utf16le_get_case_fold_codes_by_str,
onigenc_unicode_property_name_to_ctype,
onigenc_unicode_is_code_ctype,
onigenc_utf16_32_get_ctype_code_range,
utf16le_left_adjust_char_head,
onigenc_always_false_is_allowed_reverse_match,
ONIGENC_FLAG_UNICODE,
};

181
src/Onigmo/enc/utf32_be.c Normal file
View File

@ -0,0 +1,181 @@
/**********************************************************************
utf32_be.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
static int
utf32be_mbc_enc_len(const UChar* p ARG_UNUSED)
{
return 4;
}
static int
utf32be_is_mbc_newline(const UChar* p, const UChar* end)
{
if (p + 3 < end) {
if (*(p+3) == 0x0a && *(p+2) == 0 && *(p+1) == 0 && *p == 0)
return 1;
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
if ((*(p+3) == 0x0b || *(p+3) == 0x0c || *(p+3) == 0x0d || *(p+3) == 0x85)
&& *(p+2) == 0 && *(p+1) == 0 && *p == 0x00)
return 1;
if (*(p+2) == 0x20 && (*(p+3) == 0x29 || *(p+3) == 0x28)
&& *(p+1) == 0 && *p == 0)
return 1;
#endif
}
return 0;
}
static OnigCodePoint
utf32be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
{
return (OnigCodePoint )(((p[0] * 256 + p[1]) * 256 + p[2]) * 256 + p[3]);
}
static int
utf32be_code_to_mbclen(OnigCodePoint code ARG_UNUSED)
{
return 4;
}
static int
utf32be_code_to_mbc(OnigCodePoint code, UChar *buf)
{
UChar* p = buf;
*p++ = (UChar )((code & 0xff000000) >>24);
*p++ = (UChar )((code & 0xff0000) >>16);
*p++ = (UChar )((code & 0xff00) >> 8);
*p++ = (UChar ) (code & 0xff);
return 4;
}
static int
utf32be_mbc_case_fold(OnigCaseFoldType flag,
const UChar** pp, const UChar* end, UChar* fold)
{
const UChar* p = *pp;
if (ONIGENC_IS_ASCII_CODE(*(p+3)) && *(p+2) == 0 && *(p+1) == 0 && *p == 0) {
*fold++ = 0;
*fold++ = 0;
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
if (*(p+3) == 0x49) {
*fold++ = 0x01;
*fold = 0x31;
(*pp) += 4;
return 4;
}
}
#endif
*fold++ = 0;
*fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*(p+3));
*pp += 4;
return 4;
}
else
return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF32_BE, flag, pp, end,
fold);
}
#if 0
static int
utf32be_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
{
const UChar* p = *pp;
(*pp) += 4;
if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) {
int c, v;
p += 3;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
return TRUE;
}
c = *p;
v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c,
(BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
if ((v | BIT_CTYPE_LOWER) != 0) {
/* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
if (c >= 0xaa && c <= 0xba)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
return FALSE;
}
#endif
static UChar*
utf32be_left_adjust_char_head(const UChar* start, const UChar* s)
{
int rem;
if (s <= start) return (UChar* )s;
rem = (int )((s - start) % 4);
return (UChar* )(s - rem);
}
static int
utf32be_get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF32_BE,
flag, p, end, items);
}
OnigEncodingType OnigEncodingUTF32_BE = {
utf32be_mbc_enc_len,
"UTF-32BE", /* name */
4, /* max byte length */
4, /* min byte length */
utf32be_is_mbc_newline,
utf32be_mbc_to_code,
utf32be_code_to_mbclen,
utf32be_code_to_mbc,
utf32be_mbc_case_fold,
onigenc_unicode_apply_all_case_fold,
utf32be_get_case_fold_codes_by_str,
onigenc_unicode_property_name_to_ctype,
onigenc_unicode_is_code_ctype,
onigenc_utf16_32_get_ctype_code_range,
utf32be_left_adjust_char_head,
onigenc_always_false_is_allowed_reverse_match,
ONIGENC_FLAG_UNICODE,
};

181
src/Onigmo/enc/utf32_le.c Normal file
View File

@ -0,0 +1,181 @@
/**********************************************************************
utf32_le.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
static int
utf32le_mbc_enc_len(const UChar* p ARG_UNUSED)
{
return 4;
}
static int
utf32le_is_mbc_newline(const UChar* p, const UChar* end)
{
if (p + 3 < end) {
if (*p == 0x0a && *(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0)
return 1;
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
if ((*p == 0x0b ||*p == 0x0c ||*p == 0x0d || *p == 0x85)
&& *(p+1) == 0x00 && (p+2) == 0x00 && *(p+3) == 0x00)
return 1;
if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28)
&& *(p+2) == 0x00 && *(p+3) == 0x00)
return 1;
#endif
}
return 0;
}
static OnigCodePoint
utf32le_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
{
return (OnigCodePoint )(((p[3] * 256 + p[2]) * 256 + p[1]) * 256 + p[0]);
}
static int
utf32le_code_to_mbclen(OnigCodePoint code ARG_UNUSED)
{
return 4;
}
static int
utf32le_code_to_mbc(OnigCodePoint code, UChar *buf)
{
UChar* p = buf;
*p++ = (UChar ) (code & 0xff);
*p++ = (UChar )((code & 0xff00) >> 8);
*p++ = (UChar )((code & 0xff0000) >>16);
*p++ = (UChar )((code & 0xff000000) >>24);
return 4;
}
static int
utf32le_mbc_case_fold(OnigCaseFoldType flag,
const UChar** pp, const UChar* end, UChar* fold)
{
const UChar* p = *pp;
if (ONIGENC_IS_ASCII_CODE(*p) && *(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) {
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
if (*p == 0x49) {
*fold++ = 0x31;
*fold++ = 0x01;
}
}
else {
#endif
*fold++ = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
*fold++ = 0;
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
}
#endif
*fold++ = 0;
*fold = 0;
*pp += 4;
return 4;
}
else
return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF32_LE, flag, pp, end,
fold);
}
#if 0
static int
utf32le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
{
const UChar* p = *pp;
(*pp) += 4;
if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) {
int c, v;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
return TRUE;
}
c = *p;
v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c,
(BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
if ((v | BIT_CTYPE_LOWER) != 0) {
/* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
if (c >= 0xaa && c <= 0xba)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
return FALSE;
}
#endif
static UChar*
utf32le_left_adjust_char_head(const UChar* start, const UChar* s)
{
int rem;
if (s <= start) return (UChar* )s;
rem = (int )((s - start) % 4);
return (UChar* )(s - rem);
}
static int
utf32le_get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF32_LE,
flag, p, end, items);
}
OnigEncodingType OnigEncodingUTF32_LE = {
utf32le_mbc_enc_len,
"UTF-32LE", /* name */
4, /* max byte length */
4, /* min byte length */
utf32le_is_mbc_newline,
utf32le_mbc_to_code,
utf32le_code_to_mbclen,
utf32le_code_to_mbc,
utf32le_mbc_case_fold,
onigenc_unicode_apply_all_case_fold,
utf32le_get_case_fold_codes_by_str,
onigenc_unicode_property_name_to_ctype,
onigenc_unicode_is_code_ctype,
onigenc_utf16_32_get_ctype_code_range,
utf32le_left_adjust_char_head,
onigenc_always_false_is_allowed_reverse_match,
ONIGENC_FLAG_UNICODE,
};

304
src/Onigmo/enc/utf8.c Normal file
View File

@ -0,0 +1,304 @@
/**********************************************************************
utf8.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define USE_INVALID_CODE_SCHEME
#ifdef USE_INVALID_CODE_SCHEME
/* virtual codepoint values for invalid encoding byte 0xfe and 0xff */
#define INVALID_CODE_FE 0xfffffffe
#define INVALID_CODE_FF 0xffffffff
#define VALID_CODE_LIMIT 0x7fffffff
#endif
#define utf8_islead(c) ((UChar )((c) & 0xc0) != 0x80)
static const int EncLen_UTF8[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
};
static int
mbc_enc_len(const UChar* p)
{
return EncLen_UTF8[*p];
}
static int
is_mbc_newline(const UChar* p, const UChar* end)
{
if (p < end) {
if (*p == 0x0a) return 1;
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
if (*p == 0x0b || *p == 0x0c || *p == 0x0d) return 1;
if (p + 1 < end) {
if (*(p+1) == 0x85 && *p == 0xc2) /* U+0085 */
return 1;
if (p + 2 < end) {
if ((*(p+2) == 0xa8 || *(p+2) == 0xa9)
&& *(p+1) == 0x80 && *p == 0xe2) /* U+2028, U+2029 */
return 1;
}
}
#endif
}
return 0;
}
static OnigCodePoint
mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
{
int c, len;
OnigCodePoint n;
len = mbc_enc_len(p);
c = *p++;
if (len > 1) {
len--;
n = c & ((1 << (6 - len)) - 1);
while (len--) {
c = *p++;
n = (n << 6) | (c & ((1 << 6) - 1));
}
return n;
}
else {
#ifdef USE_INVALID_CODE_SCHEME
if (c > 0xfd) {
return ((c == 0xfe) ? INVALID_CODE_FE : INVALID_CODE_FF);
}
#endif
return (OnigCodePoint )c;
}
}
static int
code_to_mbclen(OnigCodePoint code)
{
if ((code & 0xffffff80) == 0) return 1;
else if ((code & 0xfffff800) == 0) return 2;
else if ((code & 0xffff0000) == 0) return 3;
else if ((code & 0xffe00000) == 0) return 4;
else if ((code & 0xfc000000) == 0) return 5;
else if ((code & 0x80000000) == 0) return 6;
#ifdef USE_INVALID_CODE_SCHEME
else if (code == INVALID_CODE_FE) return 1;
else if (code == INVALID_CODE_FF) return 1;
#endif
else
return ONIGERR_INVALID_CODE_POINT_VALUE;
}
static int
code_to_mbc(OnigCodePoint code, UChar *buf)
{
#define UTF8_TRAILS(code, shift) (UChar )((((code) >> (shift)) & 0x3f) | 0x80)
#define UTF8_TRAIL0(code) (UChar )(((code) & 0x3f) | 0x80)
if ((code & 0xffffff80) == 0) {
*buf = (UChar )code;
return 1;
}
else {
UChar *p = buf;
if ((code & 0xfffff800) == 0) {
*p++ = (UChar )(((code>>6)& 0x1f) | 0xc0);
}
else if ((code & 0xffff0000) == 0) {
*p++ = (UChar )(((code>>12) & 0x0f) | 0xe0);
*p++ = UTF8_TRAILS(code, 6);
}
else if ((code & 0xffe00000) == 0) {
*p++ = (UChar )(((code>>18) & 0x07) | 0xf0);
*p++ = UTF8_TRAILS(code, 12);
*p++ = UTF8_TRAILS(code, 6);
}
else if ((code & 0xfc000000) == 0) {
*p++ = (UChar )(((code>>24) & 0x03) | 0xf8);
*p++ = UTF8_TRAILS(code, 18);
*p++ = UTF8_TRAILS(code, 12);
*p++ = UTF8_TRAILS(code, 6);
}
else if ((code & 0x80000000) == 0) {
*p++ = (UChar )(((code>>30) & 0x01) | 0xfc);
*p++ = UTF8_TRAILS(code, 24);
*p++ = UTF8_TRAILS(code, 18);
*p++ = UTF8_TRAILS(code, 12);
*p++ = UTF8_TRAILS(code, 6);
}
#ifdef USE_INVALID_CODE_SCHEME
else if (code == INVALID_CODE_FE) {
*p = 0xfe;
return 1;
}
else if (code == INVALID_CODE_FF) {
*p = 0xff;
return 1;
}
#endif
else {
return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
}
*p++ = UTF8_TRAIL0(code);
return (int )(p - buf);
}
}
static int
mbc_case_fold(OnigCaseFoldType flag, const UChar** pp,
const UChar* end, UChar* fold)
{
const UChar* p = *pp;
if (ONIGENC_IS_MBC_ASCII(p)) {
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
if (*p == 0x49) {
*fold++ = 0xc4;
*fold = 0xb1;
(*pp)++;
return 2;
}
}
#endif
*fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
(*pp)++;
return 1; /* return byte length of converted char to lower */
}
else {
return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF8, flag,
pp, end, fold);
}
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
{
const UChar* p = *pp;
if (ONIGENC_IS_MBC_ASCII(p)) {
(*pp)++;
return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
}
else {
(*pp) += mbc_enc_len(p);
if (*p == 0xc3) {
int c = *(p + 1);
if (c >= 0x80) {
if (c <= (UChar )0x9e) { /* upper */
if (c == (UChar )0x97) return FALSE;
return TRUE;
}
else if (c >= (UChar )0xa0 && c <= (UChar )0xbe) { /* lower */
if (c == (UChar )'\267') return FALSE;
return TRUE;
}
else if (c == (UChar )0x9f &&
(flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
return TRUE;
}
}
}
}
return FALSE;
}
#endif
static int
get_ctype_code_range(OnigCtype ctype, OnigCodePoint *sb_out,
const OnigCodePoint* ranges[])
{
*sb_out = 0x80;
return onigenc_unicode_ctype_code_range(ctype, ranges);
}
static UChar*
left_adjust_char_head(const UChar* start, const UChar* s)
{
const UChar *p;
if (s <= start) return (UChar* )s;
p = s;
while (!utf8_islead(*p) && p > start) p--;
return (UChar* )p;
}
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF8,
flag, p, end, items);
}
OnigEncodingType OnigEncodingUTF8 = {
mbc_enc_len,
"UTF-8", /* name */
6, /* max byte length */
1, /* min byte length */
is_mbc_newline,
mbc_to_code,
code_to_mbclen,
code_to_mbc,
mbc_case_fold,
onigenc_unicode_apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_unicode_property_name_to_ctype,
onigenc_unicode_is_code_ctype,
get_ctype_code_range,
left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_UNICODE,
};

191
src/Onigmo/index.html Normal file
View File

@ -0,0 +1,191 @@
<html>
<head>
<meta HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=x-sjis">
<title>Oniguruma</title>
</head>
<body BGCOLOR="#ffffff" VLINK="#808040" TEXT="#696969">
<h1>Oniguruma</h1> (<a href="index_ja.html">Japanese</a>)
<p>
(c) K.Kosako, updated at: 2013/04/04
</p>
<dl>
<font color="orange">
<dt><b>What's new</b>
</font>
<ul>
<li>2013/04/04: Version 5.9.4 released.</li>
<li>2007/08/16: Version 4.7.1 released.</li>
<li>2007/06/20: Version 2.5.9 released.</li>
<li>2007/06/20: Maintainer of 2.x was changed.</li>
</ul>
</dl>
<hr>
<p>
Oniguruma is a regular expressions library.<br>
The characteristics of this library is that different character encoding
<br>for every regular expression object can be specified.
<br>(supported APIs: GNU regex, POSIX and Oniguruma native)
</p>
<dl>
<dt><b>Supported character encodings:</b><br>
ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,<br>
EUC-JP, EUC-TW, EUC-KR, EUC-CN,<br>
Shift_JIS, Big5, GB18030, KOI8-R, CP1251,<br>
ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,<br>
ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,<br>
ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16<br>
<font color="orange">
(GB18030 encoding was contributed by KUBO Takehiro)<br>
(CP1251 encoding was contributed by Byte)
</font>
</p>
</dl>
<hr>
<dt><b>License:</b> BSD license.
<dl>
<dt><b>Platform:</b>
<ul>
<li> Unix (include Mac OS X)
<li> Cygwin
<li> Win32
</ul>
<br>
<dt><b>Download:</b>
<ul>
<li> <a href="archive/onig-5.9.4.tar.gz">Latest release version 5.9.4</a> (2013/04/04) <a href="HISTORY_5X.txt">Change Log</a>
<li> <a href="archive/onig-5.9.3.tar.gz">5.9.3</a> (2012/10/26)
<li> <a href="archive/onig-4.7.1.tar.gz">Latest release version 4.7.1</a> (2007/08/16) <a href="HISTORY_4X.txt">Change Log</a>
<li> <a href="archive/onig-4.7.0.tar.gz">4.7.0</a> (2007/06/18)
<li> <a href="archive/onigd2_5_9.tar.gz">Latest release version 2.5.9</a> (2007/06/20) <a href="HISTORY_2X.txt">Change Log</a>
</ul>
<br>
<font color="red">
Maintainer of 2.x was changed to Hannes Wyss &lt;hwyss AT ywesee.com&gt;.<br>
About 2.x, please contact him.<br>
</font>
* 5.x supports Unicode Property/Script.<br>
* 2.x supports Ruby1.6/1.8.<br>
<br>
<dt><b>Documents:</b> (version 5.9.4)
<ul>
<li> <a href="doc/RE.txt">Regular Expressions</a>
<a href="doc/RE.ja.txt">(Japanese: EUC-JP)</a>
<li> <a href="doc/API.txt">Oniguruma API</a>
<a href="doc/API.ja.txt">(Japanese: EUC-JP)</a>
</ul>
<br>
<dt><b>Sample Programs:</b>
<ul>
<li><a href="sample/simple.c">example of the minimum</a>
<li><a href="sample/sql.c">example of the variable syntax and meta character (SQL-like pattern match)</a>
</ul>
<br>
<dt><b>Site Links:</b>
<ul>
<li> <a href="http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/">FreeBSD ports</a>
<li> <a href="http://www.softantenna.com/lib/1953/index.html">SoftAntenna &gt; Lib &gt; Oniguruma</a> (Japanese page)
</ul>
<br>
<dt><b>Links:</b>
<ul>
<li> <a href="http://www.perzl.org/aix/index.php?n=Main.Oniguruma">AIX Open Source Packages</a>
<li> <a href="https://aur.archlinux.org/packages/oniguruma/">Arch Linux Package</a>
<li> <a href="http://homepage3.nifty.com/k-takata/mysoft/bregonig.html">bregonig.dll (Win32)</a> (Japanese page)
<li> <a href="http://www.halbiz.com/osaru/cnregex.html">cnRegex 4D Plugin (Mac OS X)</a> (Japanese page)
<li> <a href="http://limechat.net/cocoaoniguruma/">CocoaOniguruma</a>
<li> <a href="http://kmaebashi.com/">crowbar</a> (Japanese page)
<li> <a href="http://oniguruma5.darwinports.com">Darwin Ports (Mac OS X)</a>
<li> <a href="http://homepage2.nifty.com/Km/onig.htm">Delphi interface (Win32)</a> (Japanese page)
<li> <a href="http://pyxis-project.net/ensemble/">Ensemble (Mac OS X)</a> (Japanese page)
<li> <a href="http://www.srcw.net/FaEdit/">FaEdit (Win32)</a> (Japanese page)
<li> <a href="http://www.tom.sfc.keio.ac.jp/~sakai/d/?date=20050209">GHC patch</a> Masahiro Sakai (Japanese Blog)
<li> <a href="http://www.gyazsquare.com/gyazmail/index.php">GyazMail (Mac OS X)</a>
<li> <a href="http://www5d.biglobe.ne.jp/~f-taste/knt3/jcref3.html">J-cref v3</a> (Japanese page)
<li> <a href="http://www.artman21.net/">Jedit X (Mac OS X)</a>
<li> <a href="http://www.chitora.jp/lhaz.html">Lhaz (Win32)</a> (Japanese page)
<li> <a href="http://limechat.net/">LimeChat</a> (Japanese page)
<li> <a href="http://medb.enhiro.com/">meDB</a> (Japanese page)
<li> <a href="http://monaos.org/">Mona OS</a>
<li> <a href="http://mongoose.jp/">mongoose</a> (Japanese page)
<li> <a href="http://www.irori.org/tool/mregexp.html">mregexp</a> (Japanese page)
<li> <a href="http://ochusha.sourceforge.jp/">Ochusha</a> (Japanese page)
<li> <a href="http://sonoisa.github.com/ogrekit/About_%28English%29.html">OgreKit (Mac OS X)</a> Regular Expression Framework for Cocoa (Japanese page)
<li> <a href="http://www.kanetaka.net/4dapi/wiki4d.dll/4dcgi/wiki.cgi?plugins-oniguruma">OnigRegexp</a> (Japanese page)
<li> <a href="http://rubyforge.org/projects/oniguruma">Oniguruma for Ruby</a>
<li> <a href="http://openspace.timedia.co.jp/~yasuyuki/wiliki/wiliki.cgi?Oniguruma-mysqld&l=jp">Oniguruma-mysqld</a>
<li> <a href="http://www.void.in/wiki/OnigPP">OnigPP</a> (Japanese page)
<li> <a href="http://www.kt.rim.or.jp/~kbk/sed/index.html">Onigsed (Win32)</a> (Japanese page)
<li> <a href="http://glozer.net/code.html#oregexp">oregexp</a> Erlang binding
<li> <a href="http://www.kt.rim.or.jp/~kbk/yagrep/index.html">yagrep (Win32)</a> (Japanese page)
<li> <a href="http://www.php.gr.jp/">Japan PHP User Group</a> PHP 5.0 mb_ereg (Japanese page)
<li> <a href="http://yatsu.info/wiki/Pufui/">Pufui (Mac OS X)</a> (Japanese page)
<li> <a href="http://ultrapop.jp/?q2ch">q2ch</a> (Japanese page)
<li> <a href="http://search.cpan.org/~andya/re-engine-Oniguruma">re-engine-Oniguruma</a>
<li> <a href="http://harumune.s56.xrea.com/assari/index.php?RSSTyping">RSSTyping</a> (Japanese page)
<li> <a href="http://tobysoft.net/wiki/index.php?Ruby%2Fruby-win32-oniguruma">ruby-win32-oniguruma</a> (Japanese page)
<li> <a href="http://quux.s74.xrea.com/">SevenFour (Mac OS X)</a> (Japanese page)
<li> <a href="http://storklab.cyber-ninja.jp/">Stork Lab. Products (Mac OS X)</a> (Japanese page)
<li> <a href="http://sourceforge.jp/projects/ttssh2/">TeraTerm (Win32)</a>
<li> <a href="http://www8.ocn.ne.jp/~sonoisa/TiddlyWikiPod/">TiddlyWikiPod (Mac OS X)</a>
<li> <a href="http://www.cyanworks.net/mac.html">TunesTEXT (Mac OS X)</a>
<li> <a href="https://code.google.com/p/oniguruma-visualworks/">oniguruma-visualworks</a>
<li> <a href="http://sourceforge.jp/projects/frogger/">XML parser</a>
<li> <a href="http://www.yokkasoft.net/">YokkaSoft (Win32)</a> (Japanese page)
<li> <a href="http://www.hi-ho.ne.jp/kuze/tool.htm">Zed (Win32)</a> (Japanese page)
</ul>
<br>
<dt><b>References:</b>
<ul>
<li> <a href="http://www.ruby-lang.org/ja/man/index.cgi?cmd=view;name=%C0%B5%B5%AC%C9%BD%B8%BD">Ruby Reference Manual Regexp</a> (Japanese page)
<li> <a href="http://www.perl.com/doc/manual/html/pod/perlre.html">Perl regular expressions</a>
<li> <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">java.util.regex.Pattern (J2SE 1.4.2)</a>
<li> <a href="http://www.opengroup.org/onlinepubs/007908799/xbd/re.html">The Open Group</a>
<li> <a href="http://regex.info/">Mastering Regular Expressions</a>
<li> <a href="http://www.unicode.org/">Unicode Home Page</a>
<li> <a href="http://www.kt.rim.or.jp/~kbk/regex/regex.html">Regular expressions memo</a> (Japanese page)
<li> <a href="http://www.din.or.jp/~ohzaki/regex.htm">Regular expressions technique</a> (Japanese page)
</ul>
<br>
</dl>
<p>
and I'm thankful to Akinori MUSHA.
</p>
<hr>
<dl>
<dt><b>Other Libraries:</b>
<ul>
<li> <a href="http://www.boost.org/libs/regex/doc/">Boost.Regex</a>
<li> <a href="http://arglist.com/regex/">A copy of Henry Spencer's</a>
<li> <a href="http://directory.fsf.org/regex.html">GNU regex</a>
<li> <a href="http://www.pcre.org/">PCRE</a>
<li> <a href="http://re2c.org/">re2c</a>
<li> <a href="http://tiny-rex.sourceforge.net/">T-Rex</a>
<li> <a href="http://laurikari.net/tre/">TRE</a>
<li> <a href="http://svn.codehaus.org/jruby/joni/">Joni (Java)</a>
<li> <a href="http://jregex.sourceforge.net/">JRegex (Java)</a>
<li> <a href="http://www.cacas.org/java/gnu/regexp/">gnu.regexp for Java</a>
<li> <a href="http://jakarta.apache.org/regexp/index.html">Jakarta Project Regexp</a>
<li> <a href="http://jakarta.apache.org/oro/">Jakarta Project ORO</a>
<li> <a href="http://sourceforge.jp/projects/onig4j/">Oniguruma for Java</a>
</ul>
</dl>
</body>
</html>

194
src/Onigmo/index_ja.html Normal file
View File

@ -0,0 +1,194 @@
<html>
<head>
<meta HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=utf-8">
<title>鬼車</title>
</head>
<body BGCOLOR="#ffffff" VLINK="#808040" TEXT="#696969">
<h1>鬼車</h1>
<p>
(c) K.Kosako, 最終更新: 2013/04/04
</p>
<dl>
<font color="orange">
<dt><b>更新情報</b>
</font>
<ul>
<li>2013/04/04: Version 5.9.4 リリース</li>
<li>2007/08/16: Version 4.7.1 リリース</li>
<li>2007/06/20: Version 2.5.9 リリース</li>
<li>2007/06/20: 2.xの保守担当者を変更</li>
</ul>
</dl>
<hr>
<p>
鬼車は正規表現ライブラリである。<br>
このライブラリの特徴は、それぞれの正規表現オブジェクトごとに異なる文字エンコーディングを
指定できること。<br>
(API: GNU regex, POSIX and Oniguruma native)
</p>
<dl>
<dt><b>対応している文字エンコーディング:</b><br>
ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,<br>
EUC-JP, EUC-TW, EUC-KR, EUC-CN,<br>
Shift_JIS, Big5, GB18030, KOI8-R, CP1251,<br>
ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,<br>
ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,<br>
ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16<br>
<font color="orange">
(GB18030は、KUBO Takehiro氏提供)<br>
(CP1251は、Byte氏提供)
</font>
</p>
</dl>
<hr>
<dt><b>ライセンス:</b>BSDライセンス
<dl>
<dt><b>プラットフォーム:</b>
<ul>
<li> Unix (Mac OS Xを含む)
<li> Cygwin
<li> Win32
</ul>
<br>
<dt><b>ダウンロード:</b>
<ul>
<li> <a href="archive/onig-5.9.4.tar.gz">5.9.4 最新版</a> (2013/04/04) <a href="HISTORY_5X.txt">更新履歴</a>
<li> <a href="archive/onig-5.9.3.tar.gz">5.9.3</a> (2012/10/26)
<li> <a href="archive/onig-4.7.1.tar.gz">4.7.1 最新版</a> (2007/08/16) <a href="HISTORY_4X.txt">更新履歴</a>
<li> <a href="archive/onig-4.7.0.tar.gz">4.7.0</a> (2007/06/18)
<li> <a href="archive/onigd2_5_9.tar.gz">2.5.9 最新版</a> (2007/06/20) <a href="HISTORY_2X.txt">更新履歴</a>
</ul>
<br>
<font color="red">
2.xの保守担当は、Hannes Wyss &lt;hwyss AT ywesee.com&gt;に交替しました。<br>
2.xについては、彼に連絡してください。<br>
</font>
* 5.xはUnicode Property/Scriptを提供<br>
* 2.xはRuby1.6/1.8組込みライブラリとして動作する。 (2006年末で保守を終了)<br>
<br>
<dt><b>ドキュメント:</b> (version 5.9.4)
<ul>
<li> <a href="doc/RE.txt">正規表現</a>
<a href="doc/RE.ja.txt">(日本語: EUC-JP)</a>
<li> <a href="doc/API.txt">鬼車API</a>
<a href="doc/API.ja.txt">(日本語: EUC-JP)</a>
</ul>
<br>
<dt><b>サンプルプログラム:</b>
<ul>
<li><a href="sample/simple.c">最小使用例</a>
<li><a href="sample/sql.c">可変文法と可変メタ文字機能使用例(SQL-like pattern match)</a>
</ul>
<br>
<dt><b>サイト:</b>
<ul>
<li> <a href="http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/">FreeBSD ports</a>
<li> <a href="http://www.softantenna.com/lib/1953/index.html">SoftAntenna &gt; Lib &gt; Oniguruma</a> (日本語)
</ul>
<br>
<dt><b>リンク:</b>
<ul>
<li> <a href="http://www.perzl.org/aix/index.php?n=Main.Oniguruma">AIX Open Source Packages</a>
<li> <a href="https://aur.archlinux.org/packages/oniguruma/">Arch Linux Package</a>
<li> <a href="http://homepage3.nifty.com/k-takata/mysoft/bregonig.html">bregonig.dll (Win32)</a> (日本語)
<li> <a href="http://www.halbiz.com/osaru/cnregex.html">cnRegex 4D Plugin (Mac OS X)</a> (日本語)
<li> <a href="http://limechat.net/cocoaoniguruma/index_ja.html">CocoaOniguruma</a> (日本語)
<li> <a href="http://kmaebashi.com/">crowbar</a> (日本語)
<li> <a href="http://oniguruma5.darwinports.com">Darwin Ports (Mac OS X)</a>
<li> <a href="http://homepage2.nifty.com/Km/onig.htm">Delphi interface (Win32)</a> (日本語)
<li> <a href="http://pyxis-project.net/ensemble/">Ensemble (Mac OS X)</a> (日本語)
<li> <a href="http://www.srcw.net/FaEdit/">FaEdit (Win32)</a> (日本語)
<li> <a href="http://www.tom.sfc.keio.ac.jp/~sakai/d/?date=20050209">GHC patch</a> Masahiro Sakai (Japanese Blog)
<li> <a href="http://www.gyazsquare.com/gyazmail/index.php">GyazMail (Mac OS X)</a>
<li> <a href="http://www5d.biglobe.ne.jp/~f-taste/knt3/jcref3.html">J-cref v3</a> (日本語)
<li> <a href="http://www.artman21.net/">Jedit X (Mac OS X)</a>
<li> <a href="http://www.chitora.jp/lhaz.html">Lhaz (Win32)</a> (日本語)
<li> <a href="http://limechat.net/">LimeChat</a> (日本語)
<li> <a href="http://medb.enhiro.com/">meDB</a> (日本語)
<li> <a href="http://monaos.org/">Mona OS</a>
<li> <a href="http://mongoose.jp/">mongoose</a> (日本語)
<li> <a href="http://www.irori.org/tool/mregexp.html">mregexp</a> (日本語)
<li> <a href="http://ochusha.sourceforge.jp/">おちゅ〜しゃ</a> (日本語)
<li> <a href="http://sonoisa.github.com/ogrekit/About.html">OgreKit (Mac OS X)</a> Regular Expression Framework for Cocoa (日本語)
<li> <a href="http://www.kanetaka.net/4dapi/wiki4d.dll/4dcgi/wiki.cgi?plugins-oniguruma">OnigRegexp</a> (日本語)
<li> <a href="http://rubyforge.org/projects/oniguruma">Oniguruma for Ruby</a>
<li> <a href="http://openspace.timedia.co.jp/~yasuyuki/wiliki/wiliki.cgi?Oniguruma-mysqld&l=jp">Oniguruma-mysqld</a>
<li> <a href="http://www.void.in/wiki/OnigPP">OnigPP</a> (日本語)
<li> <a href="http://www.kt.rim.or.jp/~kbk/sed/index.html">Onigsed (Win32)</a> (日本語)
<li> <a href="http://glozer.net/code.html#oregexp">oregexp</a> Erlang binding
<li> <a href="http://www.kt.rim.or.jp/~kbk/yagrep/index.html">yagrep (Win32)</a> (日本語)
<li> <a href="http://www.php.gr.jp/">日本PHPユーザ会</a> PHP 5.0 mb_ereg (日本語)
<li> <a href="http://yatsu.info/wiki/Pufui/">Pufui (Mac OS X)</a> (日本語)
<li> <a href="http://ultrapop.jp/?q2ch">q2ch</a> (日本語)
<li> <a href="http://search.cpan.org/~andya/re-engine-Oniguruma">re-engine-Oniguruma</a>
<li> <a href="http://harumune.s56.xrea.com/assari/index.php?RSSTyping">RSSTyping</a> (日本語)
<li> <a href="http://tobysoft.net/wiki/index.php?Ruby%2Fruby-win32-oniguruma">ruby-win32-oniguruma</a> (日本語)
<li> <a href="http://quux.s74.xrea.com/">SevenFour (Mac OS X)</a> (日本語)
<li> <a href="http://storklab.cyber-ninja.jp/">Stork Lab. Products (Mac OS X)</a> (日本語)
<li> <a href="http://sourceforge.jp/projects/ttssh2/">TeraTerm (Win32)</a>
<li> <a href="http://www8.ocn.ne.jp/~sonoisa/TiddlyWikiPod/">TiddlyWikiPod (Mac OS X)</a>
<li> <a href="http://www.cyanworks.net/mac.html">TunesTEXT (Mac OS X)</a>
<li> <a href="https://code.google.com/p/oniguruma-visualworks/">oniguruma-visualworks</a>
<li> <a href="http://sourceforge.jp/projects/frogger/">XML parser</a>
<li> <a href="http://www.yokkasoft.net/">YokkaSoft (Win32)</a> (日本語)
<li> <a href="http://www.hi-ho.ne.jp/kuze/tool.htm">Zed (Win32)</a> (日本語)
</ul>
<br>
<dt><b>参考資料:</b>
<ul>
<li> <a href="http://www.ruby-lang.org/ja/man/index.cgi?cmd=view;name=%C0%B5%B5%AC%C9%BD%B8%BD">Rubyリファレンスマニュアル</a> (日本語)
<li> <a href="http://www.perl.com/doc/manual/html/pod/perlre.html">Perl regular expressions</a>
<li> <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">java.util.regex.Pattern (J2SE 1.4.2)</a>
<li> <a href="http://www.opengroup.org/onlinepubs/007908799/xbd/re.html">The Open Group</a>
<li> <a href="http://regex.info/">Mastering Regular Expressions</a>
<li> <a href="http://www.unicode.org/">Unicode Home Page</a>
<li> <a href="http://www.kt.rim.or.jp/~kbk/regex/regex.html">正規表現メモ</a> (日本語)
<li> <a href="http://www.din.or.jp/~ohzaki/regex.htm">Perl正規表現雑技</a> (日本語)
</ul>
<br>
</dl>
<p>
and I'm thankful to Akinori MUSHA.
</p>
<hr>
<dl>
<dt><b>他のライブラリ:</b>
<ul>
<li> <a href="http://www.boost.org/libs/regex/doc/">Boost.Regex</a>
<li> <a href="http://arglist.com/regex/">A copy of Henry Spencer's</a>
<li> <a href="http://directory.fsf.org/regex.html">GNU regex</a>
<li> <a href="http://www.pcre.org/">PCRE</a>
<li> <a href="http://re2c.org/">re2c</a>
<li> <a href="http://tiny-rex.sourceforge.net/">T-Rex</a>
<li> <a href="http://laurikari.net/tre/">TRE</a>
<li> <a href="http://svn.codehaus.org/jruby/joni/">Joni (Java)</a>
<li> <a href="http://jregex.sourceforge.net/">JRegex (Java)</a>
<li> <a href="http://www.cacas.org/java/gnu/regexp/">gnu.regexp for Java</a>
<li> <a href="http://jakarta.apache.org/regexp/index.html">Jakarta Project Regexp</a>
<li> <a href="http://jakarta.apache.org/oro/">Jakarta Project ORO</a>
<li> <a href="http://sourceforge.jp/projects/onig4j/">Oniguruma for Java</a>
</ul>
</dl>
<hr>
<a href="../">ホームにもどる</a>
</body>
</html>

520
src/Onigmo/install-sh Normal file
View File

@ -0,0 +1,520 @@
#!/bin/sh
# install - install a program, script, or datafile
scriptversion=2009-04-28.21; # UTC
# This originates from X11R5 (mit/util/scripts/install.sh), which was
# later released in X11R6 (xc/config/util/install.sh) with the
# following copyright and license.
#
# Copyright (C) 1994 X Consortium
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC-
# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# Except as contained in this notice, the name of the X Consortium shall not
# be used in advertising or otherwise to promote the sale, use or other deal-
# ings in this Software without prior written authorization from the X Consor-
# tium.
#
#
# FSF changes to this file are in the public domain.
#
# Calling this script install-sh is preferred over install.sh, to prevent
# `make' implicit rules from creating a file called install from it
# when there is no Makefile.
#
# This script is compatible with the BSD install script, but was written
# from scratch.
nl='
'
IFS=" "" $nl"
# set DOITPROG to echo to test this script
# Don't use :- since 4.3BSD and earlier shells don't like it.
doit=${DOITPROG-}
if test -z "$doit"; then
doit_exec=exec
else
doit_exec=$doit
fi
# Put in absolute file names if you don't have them in your path;
# or use environment vars.
chgrpprog=${CHGRPPROG-chgrp}
chmodprog=${CHMODPROG-chmod}
chownprog=${CHOWNPROG-chown}
cmpprog=${CMPPROG-cmp}
cpprog=${CPPROG-cp}
mkdirprog=${MKDIRPROG-mkdir}
mvprog=${MVPROG-mv}
rmprog=${RMPROG-rm}
stripprog=${STRIPPROG-strip}
posix_glob='?'
initialize_posix_glob='
test "$posix_glob" != "?" || {
if (set -f) 2>/dev/null; then
posix_glob=
else
posix_glob=:
fi
}
'
posix_mkdir=
# Desired mode of installed file.
mode=0755
chgrpcmd=
chmodcmd=$chmodprog
chowncmd=
mvcmd=$mvprog
rmcmd="$rmprog -f"
stripcmd=
src=
dst=
dir_arg=
dst_arg=
copy_on_change=false
no_target_directory=
usage="\
Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE
or: $0 [OPTION]... SRCFILES... DIRECTORY
or: $0 [OPTION]... -t DIRECTORY SRCFILES...
or: $0 [OPTION]... -d DIRECTORIES...
In the 1st form, copy SRCFILE to DSTFILE.
In the 2nd and 3rd, copy all SRCFILES to DIRECTORY.
In the 4th, create DIRECTORIES.
Options:
--help display this help and exit.
--version display version info and exit.
-c (ignored)
-C install only if different (preserve the last data modification time)
-d create directories instead of installing files.
-g GROUP $chgrpprog installed files to GROUP.
-m MODE $chmodprog installed files to MODE.
-o USER $chownprog installed files to USER.
-s $stripprog installed files.
-t DIRECTORY install into DIRECTORY.
-T report an error if DSTFILE is a directory.
Environment variables override the default commands:
CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG
RMPROG STRIPPROG
"
while test $# -ne 0; do
case $1 in
-c) ;;
-C) copy_on_change=true;;
-d) dir_arg=true;;
-g) chgrpcmd="$chgrpprog $2"
shift;;
--help) echo "$usage"; exit $?;;
-m) mode=$2
case $mode in
*' '* | *' '* | *'
'* | *'*'* | *'?'* | *'['*)
echo "$0: invalid mode: $mode" >&2
exit 1;;
esac
shift;;
-o) chowncmd="$chownprog $2"
shift;;
-s) stripcmd=$stripprog;;
-t) dst_arg=$2
shift;;
-T) no_target_directory=true;;
--version) echo "$0 $scriptversion"; exit $?;;
--) shift
break;;
-*) echo "$0: invalid option: $1" >&2
exit 1;;
*) break;;
esac
shift
done
if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then
# When -d is used, all remaining arguments are directories to create.
# When -t is used, the destination is already specified.
# Otherwise, the last argument is the destination. Remove it from $@.
for arg
do
if test -n "$dst_arg"; then
# $@ is not empty: it contains at least $arg.
set fnord "$@" "$dst_arg"
shift # fnord
fi
shift # arg
dst_arg=$arg
done
fi
if test $# -eq 0; then
if test -z "$dir_arg"; then
echo "$0: no input file specified." >&2
exit 1
fi
# It's OK to call `install-sh -d' without argument.
# This can happen when creating conditional directories.
exit 0
fi
if test -z "$dir_arg"; then
trap '(exit $?); exit' 1 2 13 15
# Set umask so as not to create temps with too-generous modes.
# However, 'strip' requires both read and write access to temps.
case $mode in
# Optimize common cases.
*644) cp_umask=133;;
*755) cp_umask=22;;
*[0-7])
if test -z "$stripcmd"; then
u_plus_rw=
else
u_plus_rw='% 200'
fi
cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;;
*)
if test -z "$stripcmd"; then
u_plus_rw=
else
u_plus_rw=,u+rw
fi
cp_umask=$mode$u_plus_rw;;
esac
fi
for src
do
# Protect names starting with `-'.
case $src in
-*) src=./$src;;
esac
if test -n "$dir_arg"; then
dst=$src
dstdir=$dst
test -d "$dstdir"
dstdir_status=$?
else
# Waiting for this to be detected by the "$cpprog $src $dsttmp" command
# might cause directories to be created, which would be especially bad
# if $src (and thus $dsttmp) contains '*'.
if test ! -f "$src" && test ! -d "$src"; then
echo "$0: $src does not exist." >&2
exit 1
fi
if test -z "$dst_arg"; then
echo "$0: no destination specified." >&2
exit 1
fi
dst=$dst_arg
# Protect names starting with `-'.
case $dst in
-*) dst=./$dst;;
esac
# If destination is a directory, append the input filename; won't work
# if double slashes aren't ignored.
if test -d "$dst"; then
if test -n "$no_target_directory"; then
echo "$0: $dst_arg: Is a directory" >&2
exit 1
fi
dstdir=$dst
dst=$dstdir/`basename "$src"`
dstdir_status=0
else
# Prefer dirname, but fall back on a substitute if dirname fails.
dstdir=`
(dirname "$dst") 2>/dev/null ||
expr X"$dst" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
X"$dst" : 'X\(//\)[^/]' \| \
X"$dst" : 'X\(//\)$' \| \
X"$dst" : 'X\(/\)' \| . 2>/dev/null ||
echo X"$dst" |
sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
s//\1/
q
}
/^X\(\/\/\)[^/].*/{
s//\1/
q
}
/^X\(\/\/\)$/{
s//\1/
q
}
/^X\(\/\).*/{
s//\1/
q
}
s/.*/./; q'
`
test -d "$dstdir"
dstdir_status=$?
fi
fi
obsolete_mkdir_used=false
if test $dstdir_status != 0; then
case $posix_mkdir in
'')
# Create intermediate dirs using mode 755 as modified by the umask.
# This is like FreeBSD 'install' as of 1997-10-28.
umask=`umask`
case $stripcmd.$umask in
# Optimize common cases.
*[2367][2367]) mkdir_umask=$umask;;
.*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;;
*[0-7])
mkdir_umask=`expr $umask + 22 \
- $umask % 100 % 40 + $umask % 20 \
- $umask % 10 % 4 + $umask % 2
`;;
*) mkdir_umask=$umask,go-w;;
esac
# With -d, create the new directory with the user-specified mode.
# Otherwise, rely on $mkdir_umask.
if test -n "$dir_arg"; then
mkdir_mode=-m$mode
else
mkdir_mode=
fi
posix_mkdir=false
case $umask in
*[123567][0-7][0-7])
# POSIX mkdir -p sets u+wx bits regardless of umask, which
# is incompatible with FreeBSD 'install' when (umask & 300) != 0.
;;
*)
tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$
trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0
if (umask $mkdir_umask &&
exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1
then
if test -z "$dir_arg" || {
# Check for POSIX incompatibilities with -m.
# HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or
# other-writeable bit of parent directory when it shouldn't.
# FreeBSD 6.1 mkdir -m -p sets mode of existing directory.
ls_ld_tmpdir=`ls -ld "$tmpdir"`
case $ls_ld_tmpdir in
d????-?r-*) different_mode=700;;
d????-?--*) different_mode=755;;
*) false;;
esac &&
$mkdirprog -m$different_mode -p -- "$tmpdir" && {
ls_ld_tmpdir_1=`ls -ld "$tmpdir"`
test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1"
}
}
then posix_mkdir=:
fi
rmdir "$tmpdir/d" "$tmpdir"
else
# Remove any dirs left behind by ancient mkdir implementations.
rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null
fi
trap '' 0;;
esac;;
esac
if
$posix_mkdir && (
umask $mkdir_umask &&
$doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir"
)
then :
else
# The umask is ridiculous, or mkdir does not conform to POSIX,
# or it failed possibly due to a race condition. Create the
# directory the slow way, step by step, checking for races as we go.
case $dstdir in
/*) prefix='/';;
-*) prefix='./';;
*) prefix='';;
esac
eval "$initialize_posix_glob"
oIFS=$IFS
IFS=/
$posix_glob set -f
set fnord $dstdir
shift
$posix_glob set +f
IFS=$oIFS
prefixes=
for d
do
test -z "$d" && continue
prefix=$prefix$d
if test -d "$prefix"; then
prefixes=
else
if $posix_mkdir; then
(umask=$mkdir_umask &&
$doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break
# Don't fail if two instances are running concurrently.
test -d "$prefix" || exit 1
else
case $prefix in
*\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;;
*) qprefix=$prefix;;
esac
prefixes="$prefixes '$qprefix'"
fi
fi
prefix=$prefix/
done
if test -n "$prefixes"; then
# Don't fail if two instances are running concurrently.
(umask $mkdir_umask &&
eval "\$doit_exec \$mkdirprog $prefixes") ||
test -d "$dstdir" || exit 1
obsolete_mkdir_used=true
fi
fi
fi
if test -n "$dir_arg"; then
{ test -z "$chowncmd" || $doit $chowncmd "$dst"; } &&
{ test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } &&
{ test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false ||
test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1
else
# Make a couple of temp file names in the proper directory.
dsttmp=$dstdir/_inst.$$_
rmtmp=$dstdir/_rm.$$_
# Trap to clean up those temp files at exit.
trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0
# Copy the file name to the temp name.
(umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") &&
# and set any options; do chmod last to preserve setuid bits.
#
# If any of these fail, we abort the whole thing. If we want to
# ignore errors from any of these, just make sure not to ignore
# errors from the above "$doit $cpprog $src $dsttmp" command.
#
{ test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } &&
{ test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } &&
{ test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } &&
{ test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } &&
# If -C, don't bother to copy if it wouldn't change the file.
if $copy_on_change &&
old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` &&
new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` &&
eval "$initialize_posix_glob" &&
$posix_glob set -f &&
set X $old && old=:$2:$4:$5:$6 &&
set X $new && new=:$2:$4:$5:$6 &&
$posix_glob set +f &&
test "$old" = "$new" &&
$cmpprog "$dst" "$dsttmp" >/dev/null 2>&1
then
rm -f "$dsttmp"
else
# Rename the file to the real destination.
$doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null ||
# The rename failed, perhaps because mv can't rename something else
# to itself, or perhaps because mv is so ancient that it does not
# support -f.
{
# Now remove or move aside any old file at destination location.
# We try this two ways since rm can't unlink itself on some
# systems and the destination file might be busy for other
# reasons. In this case, the final cleanup might fail but the new
# file should still install successfully.
{
test ! -f "$dst" ||
$doit $rmcmd -f "$dst" 2>/dev/null ||
{ $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null &&
{ $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; }
} ||
{ echo "$0: cannot unlink or rename $dst" >&2
(exit 1); exit 1
}
} &&
# Now rename the file to the real destination.
$doit $mvcmd "$dsttmp" "$dst"
}
fi || exit 1
trap '' 0
fi
done
# Local variables:
# eval: (add-hook 'write-file-hooks 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-time-zone: "UTC"
# time-stamp-end: "; # UTC"
# End:

8413
src/Onigmo/ltmain.sh Normal file

File diff suppressed because it is too large Load Diff

7377
src/Onigmo/m4/libtool.m4 vendored Normal file

File diff suppressed because it is too large Load Diff

368
src/Onigmo/m4/ltoptions.m4 vendored Normal file
View File

@ -0,0 +1,368 @@
# Helper functions for option handling. -*- Autoconf -*-
#
# Copyright (C) 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
# Written by Gary V. Vaughan, 2004
#
# This file is free software; the Free Software Foundation gives
# unlimited permission to copy and/or distribute it, with or without
# modifications, as long as this notice is preserved.
# serial 6 ltoptions.m4
# This is to help aclocal find these macros, as it can't see m4_define.
AC_DEFUN([LTOPTIONS_VERSION], [m4_if([1])])
# _LT_MANGLE_OPTION(MACRO-NAME, OPTION-NAME)
# ------------------------------------------
m4_define([_LT_MANGLE_OPTION],
[[_LT_OPTION_]m4_bpatsubst($1__$2, [[^a-zA-Z0-9_]], [_])])
# _LT_SET_OPTION(MACRO-NAME, OPTION-NAME)
# ---------------------------------------
# Set option OPTION-NAME for macro MACRO-NAME, and if there is a
# matching handler defined, dispatch to it. Other OPTION-NAMEs are
# saved as a flag.
m4_define([_LT_SET_OPTION],
[m4_define(_LT_MANGLE_OPTION([$1], [$2]))dnl
m4_ifdef(_LT_MANGLE_DEFUN([$1], [$2]),
_LT_MANGLE_DEFUN([$1], [$2]),
[m4_warning([Unknown $1 option `$2'])])[]dnl
])
# _LT_IF_OPTION(MACRO-NAME, OPTION-NAME, IF-SET, [IF-NOT-SET])
# ------------------------------------------------------------
# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise.
m4_define([_LT_IF_OPTION],
[m4_ifdef(_LT_MANGLE_OPTION([$1], [$2]), [$3], [$4])])
# _LT_UNLESS_OPTIONS(MACRO-NAME, OPTION-LIST, IF-NOT-SET)
# -------------------------------------------------------
# Execute IF-NOT-SET unless all options in OPTION-LIST for MACRO-NAME
# are set.
m4_define([_LT_UNLESS_OPTIONS],
[m4_foreach([_LT_Option], m4_split(m4_normalize([$2])),
[m4_ifdef(_LT_MANGLE_OPTION([$1], _LT_Option),
[m4_define([$0_found])])])[]dnl
m4_ifdef([$0_found], [m4_undefine([$0_found])], [$3
])[]dnl
])
# _LT_SET_OPTIONS(MACRO-NAME, OPTION-LIST)
# ----------------------------------------
# OPTION-LIST is a space-separated list of Libtool options associated
# with MACRO-NAME. If any OPTION has a matching handler declared with
# LT_OPTION_DEFINE, dispatch to that macro; otherwise complain about
# the unknown option and exit.
m4_defun([_LT_SET_OPTIONS],
[# Set options
m4_foreach([_LT_Option], m4_split(m4_normalize([$2])),
[_LT_SET_OPTION([$1], _LT_Option)])
m4_if([$1],[LT_INIT],[
dnl
dnl Simply set some default values (i.e off) if boolean options were not
dnl specified:
_LT_UNLESS_OPTIONS([LT_INIT], [dlopen], [enable_dlopen=no
])
_LT_UNLESS_OPTIONS([LT_INIT], [win32-dll], [enable_win32_dll=no
])
dnl
dnl If no reference was made to various pairs of opposing options, then
dnl we run the default mode handler for the pair. For example, if neither
dnl `shared' nor `disable-shared' was passed, we enable building of shared
dnl archives by default:
_LT_UNLESS_OPTIONS([LT_INIT], [shared disable-shared], [_LT_ENABLE_SHARED])
_LT_UNLESS_OPTIONS([LT_INIT], [static disable-static], [_LT_ENABLE_STATIC])
_LT_UNLESS_OPTIONS([LT_INIT], [pic-only no-pic], [_LT_WITH_PIC])
_LT_UNLESS_OPTIONS([LT_INIT], [fast-install disable-fast-install],
[_LT_ENABLE_FAST_INSTALL])
])
])# _LT_SET_OPTIONS
## --------------------------------- ##
## Macros to handle LT_INIT options. ##
## --------------------------------- ##
# _LT_MANGLE_DEFUN(MACRO-NAME, OPTION-NAME)
# -----------------------------------------
m4_define([_LT_MANGLE_DEFUN],
[[_LT_OPTION_DEFUN_]m4_bpatsubst(m4_toupper([$1__$2]), [[^A-Z0-9_]], [_])])
# LT_OPTION_DEFINE(MACRO-NAME, OPTION-NAME, CODE)
# -----------------------------------------------
m4_define([LT_OPTION_DEFINE],
[m4_define(_LT_MANGLE_DEFUN([$1], [$2]), [$3])[]dnl
])# LT_OPTION_DEFINE
# dlopen
# ------
LT_OPTION_DEFINE([LT_INIT], [dlopen], [enable_dlopen=yes
])
AU_DEFUN([AC_LIBTOOL_DLOPEN],
[_LT_SET_OPTION([LT_INIT], [dlopen])
AC_DIAGNOSE([obsolete],
[$0: Remove this warning and the call to _LT_SET_OPTION when you
put the `dlopen' option into LT_INIT's first parameter.])
])
dnl aclocal-1.4 backwards compatibility:
dnl AC_DEFUN([AC_LIBTOOL_DLOPEN], [])
# win32-dll
# ---------
# Declare package support for building win32 dll's.
LT_OPTION_DEFINE([LT_INIT], [win32-dll],
[enable_win32_dll=yes
case $host in
*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-cegcc*)
AC_CHECK_TOOL(AS, as, false)
AC_CHECK_TOOL(DLLTOOL, dlltool, false)
AC_CHECK_TOOL(OBJDUMP, objdump, false)
;;
esac
test -z "$AS" && AS=as
_LT_DECL([], [AS], [0], [Assembler program])dnl
test -z "$DLLTOOL" && DLLTOOL=dlltool
_LT_DECL([], [DLLTOOL], [0], [DLL creation program])dnl
test -z "$OBJDUMP" && OBJDUMP=objdump
_LT_DECL([], [OBJDUMP], [0], [Object dumper program])dnl
])# win32-dll
AU_DEFUN([AC_LIBTOOL_WIN32_DLL],
[AC_REQUIRE([AC_CANONICAL_HOST])dnl
_LT_SET_OPTION([LT_INIT], [win32-dll])
AC_DIAGNOSE([obsolete],
[$0: Remove this warning and the call to _LT_SET_OPTION when you
put the `win32-dll' option into LT_INIT's first parameter.])
])
dnl aclocal-1.4 backwards compatibility:
dnl AC_DEFUN([AC_LIBTOOL_WIN32_DLL], [])
# _LT_ENABLE_SHARED([DEFAULT])
# ----------------------------
# implement the --enable-shared flag, and supports the `shared' and
# `disable-shared' LT_INIT options.
# DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'.
m4_define([_LT_ENABLE_SHARED],
[m4_define([_LT_ENABLE_SHARED_DEFAULT], [m4_if($1, no, no, yes)])dnl
AC_ARG_ENABLE([shared],
[AS_HELP_STRING([--enable-shared@<:@=PKGS@:>@],
[build shared libraries @<:@default=]_LT_ENABLE_SHARED_DEFAULT[@:>@])],
[p=${PACKAGE-default}
case $enableval in
yes) enable_shared=yes ;;
no) enable_shared=no ;;
*)
enable_shared=no
# Look at the argument we got. We use all the common list separators.
lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
for pkg in $enableval; do
IFS="$lt_save_ifs"
if test "X$pkg" = "X$p"; then
enable_shared=yes
fi
done
IFS="$lt_save_ifs"
;;
esac],
[enable_shared=]_LT_ENABLE_SHARED_DEFAULT)
_LT_DECL([build_libtool_libs], [enable_shared], [0],
[Whether or not to build shared libraries])
])# _LT_ENABLE_SHARED
LT_OPTION_DEFINE([LT_INIT], [shared], [_LT_ENABLE_SHARED([yes])])
LT_OPTION_DEFINE([LT_INIT], [disable-shared], [_LT_ENABLE_SHARED([no])])
# Old names:
AC_DEFUN([AC_ENABLE_SHARED],
[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[shared])
])
AC_DEFUN([AC_DISABLE_SHARED],
[_LT_SET_OPTION([LT_INIT], [disable-shared])
])
AU_DEFUN([AM_ENABLE_SHARED], [AC_ENABLE_SHARED($@)])
AU_DEFUN([AM_DISABLE_SHARED], [AC_DISABLE_SHARED($@)])
dnl aclocal-1.4 backwards compatibility:
dnl AC_DEFUN([AM_ENABLE_SHARED], [])
dnl AC_DEFUN([AM_DISABLE_SHARED], [])
# _LT_ENABLE_STATIC([DEFAULT])
# ----------------------------
# implement the --enable-static flag, and support the `static' and
# `disable-static' LT_INIT options.
# DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'.
m4_define([_LT_ENABLE_STATIC],
[m4_define([_LT_ENABLE_STATIC_DEFAULT], [m4_if($1, no, no, yes)])dnl
AC_ARG_ENABLE([static],
[AS_HELP_STRING([--enable-static@<:@=PKGS@:>@],
[build static libraries @<:@default=]_LT_ENABLE_STATIC_DEFAULT[@:>@])],
[p=${PACKAGE-default}
case $enableval in
yes) enable_static=yes ;;
no) enable_static=no ;;
*)
enable_static=no
# Look at the argument we got. We use all the common list separators.
lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
for pkg in $enableval; do
IFS="$lt_save_ifs"
if test "X$pkg" = "X$p"; then
enable_static=yes
fi
done
IFS="$lt_save_ifs"
;;
esac],
[enable_static=]_LT_ENABLE_STATIC_DEFAULT)
_LT_DECL([build_old_libs], [enable_static], [0],
[Whether or not to build static libraries])
])# _LT_ENABLE_STATIC
LT_OPTION_DEFINE([LT_INIT], [static], [_LT_ENABLE_STATIC([yes])])
LT_OPTION_DEFINE([LT_INIT], [disable-static], [_LT_ENABLE_STATIC([no])])
# Old names:
AC_DEFUN([AC_ENABLE_STATIC],
[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[static])
])
AC_DEFUN([AC_DISABLE_STATIC],
[_LT_SET_OPTION([LT_INIT], [disable-static])
])
AU_DEFUN([AM_ENABLE_STATIC], [AC_ENABLE_STATIC($@)])
AU_DEFUN([AM_DISABLE_STATIC], [AC_DISABLE_STATIC($@)])
dnl aclocal-1.4 backwards compatibility:
dnl AC_DEFUN([AM_ENABLE_STATIC], [])
dnl AC_DEFUN([AM_DISABLE_STATIC], [])
# _LT_ENABLE_FAST_INSTALL([DEFAULT])
# ----------------------------------
# implement the --enable-fast-install flag, and support the `fast-install'
# and `disable-fast-install' LT_INIT options.
# DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'.
m4_define([_LT_ENABLE_FAST_INSTALL],
[m4_define([_LT_ENABLE_FAST_INSTALL_DEFAULT], [m4_if($1, no, no, yes)])dnl
AC_ARG_ENABLE([fast-install],
[AS_HELP_STRING([--enable-fast-install@<:@=PKGS@:>@],
[optimize for fast installation @<:@default=]_LT_ENABLE_FAST_INSTALL_DEFAULT[@:>@])],
[p=${PACKAGE-default}
case $enableval in
yes) enable_fast_install=yes ;;
no) enable_fast_install=no ;;
*)
enable_fast_install=no
# Look at the argument we got. We use all the common list separators.
lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
for pkg in $enableval; do
IFS="$lt_save_ifs"
if test "X$pkg" = "X$p"; then
enable_fast_install=yes
fi
done
IFS="$lt_save_ifs"
;;
esac],
[enable_fast_install=]_LT_ENABLE_FAST_INSTALL_DEFAULT)
_LT_DECL([fast_install], [enable_fast_install], [0],
[Whether or not to optimize for fast installation])dnl
])# _LT_ENABLE_FAST_INSTALL
LT_OPTION_DEFINE([LT_INIT], [fast-install], [_LT_ENABLE_FAST_INSTALL([yes])])
LT_OPTION_DEFINE([LT_INIT], [disable-fast-install], [_LT_ENABLE_FAST_INSTALL([no])])
# Old names:
AU_DEFUN([AC_ENABLE_FAST_INSTALL],
[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[fast-install])
AC_DIAGNOSE([obsolete],
[$0: Remove this warning and the call to _LT_SET_OPTION when you put
the `fast-install' option into LT_INIT's first parameter.])
])
AU_DEFUN([AC_DISABLE_FAST_INSTALL],
[_LT_SET_OPTION([LT_INIT], [disable-fast-install])
AC_DIAGNOSE([obsolete],
[$0: Remove this warning and the call to _LT_SET_OPTION when you put
the `disable-fast-install' option into LT_INIT's first parameter.])
])
dnl aclocal-1.4 backwards compatibility:
dnl AC_DEFUN([AC_ENABLE_FAST_INSTALL], [])
dnl AC_DEFUN([AM_DISABLE_FAST_INSTALL], [])
# _LT_WITH_PIC([MODE])
# --------------------
# implement the --with-pic flag, and support the `pic-only' and `no-pic'
# LT_INIT options.
# MODE is either `yes' or `no'. If omitted, it defaults to `both'.
m4_define([_LT_WITH_PIC],
[AC_ARG_WITH([pic],
[AS_HELP_STRING([--with-pic],
[try to use only PIC/non-PIC objects @<:@default=use both@:>@])],
[pic_mode="$withval"],
[pic_mode=default])
test -z "$pic_mode" && pic_mode=m4_default([$1], [default])
_LT_DECL([], [pic_mode], [0], [What type of objects to build])dnl
])# _LT_WITH_PIC
LT_OPTION_DEFINE([LT_INIT], [pic-only], [_LT_WITH_PIC([yes])])
LT_OPTION_DEFINE([LT_INIT], [no-pic], [_LT_WITH_PIC([no])])
# Old name:
AU_DEFUN([AC_LIBTOOL_PICMODE],
[_LT_SET_OPTION([LT_INIT], [pic-only])
AC_DIAGNOSE([obsolete],
[$0: Remove this warning and the call to _LT_SET_OPTION when you
put the `pic-only' option into LT_INIT's first parameter.])
])
dnl aclocal-1.4 backwards compatibility:
dnl AC_DEFUN([AC_LIBTOOL_PICMODE], [])
## ----------------- ##
## LTDL_INIT Options ##
## ----------------- ##
m4_define([_LTDL_MODE], [])
LT_OPTION_DEFINE([LTDL_INIT], [nonrecursive],
[m4_define([_LTDL_MODE], [nonrecursive])])
LT_OPTION_DEFINE([LTDL_INIT], [recursive],
[m4_define([_LTDL_MODE], [recursive])])
LT_OPTION_DEFINE([LTDL_INIT], [subproject],
[m4_define([_LTDL_MODE], [subproject])])
m4_define([_LTDL_TYPE], [])
LT_OPTION_DEFINE([LTDL_INIT], [installable],
[m4_define([_LTDL_TYPE], [installable])])
LT_OPTION_DEFINE([LTDL_INIT], [convenience],
[m4_define([_LTDL_TYPE], [convenience])])

123
src/Onigmo/m4/ltsugar.m4 vendored Normal file
View File

@ -0,0 +1,123 @@
# ltsugar.m4 -- libtool m4 base layer. -*-Autoconf-*-
#
# Copyright (C) 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
# Written by Gary V. Vaughan, 2004
#
# This file is free software; the Free Software Foundation gives
# unlimited permission to copy and/or distribute it, with or without
# modifications, as long as this notice is preserved.
# serial 6 ltsugar.m4
# This is to help aclocal find these macros, as it can't see m4_define.
AC_DEFUN([LTSUGAR_VERSION], [m4_if([0.1])])
# lt_join(SEP, ARG1, [ARG2...])
# -----------------------------
# Produce ARG1SEPARG2...SEPARGn, omitting [] arguments and their
# associated separator.
# Needed until we can rely on m4_join from Autoconf 2.62, since all earlier
# versions in m4sugar had bugs.
m4_define([lt_join],
[m4_if([$#], [1], [],
[$#], [2], [[$2]],
[m4_if([$2], [], [], [[$2]_])$0([$1], m4_shift(m4_shift($@)))])])
m4_define([_lt_join],
[m4_if([$#$2], [2], [],
[m4_if([$2], [], [], [[$1$2]])$0([$1], m4_shift(m4_shift($@)))])])
# lt_car(LIST)
# lt_cdr(LIST)
# ------------
# Manipulate m4 lists.
# These macros are necessary as long as will still need to support
# Autoconf-2.59 which quotes differently.
m4_define([lt_car], [[$1]])
m4_define([lt_cdr],
[m4_if([$#], 0, [m4_fatal([$0: cannot be called without arguments])],
[$#], 1, [],
[m4_dquote(m4_shift($@))])])
m4_define([lt_unquote], $1)
# lt_append(MACRO-NAME, STRING, [SEPARATOR])
# ------------------------------------------
# Redefine MACRO-NAME to hold its former content plus `SEPARATOR'`STRING'.
# Note that neither SEPARATOR nor STRING are expanded; they are appended
# to MACRO-NAME as is (leaving the expansion for when MACRO-NAME is invoked).
# No SEPARATOR is output if MACRO-NAME was previously undefined (different
# than defined and empty).
#
# This macro is needed until we can rely on Autoconf 2.62, since earlier
# versions of m4sugar mistakenly expanded SEPARATOR but not STRING.
m4_define([lt_append],
[m4_define([$1],
m4_ifdef([$1], [m4_defn([$1])[$3]])[$2])])
# lt_combine(SEP, PREFIX-LIST, INFIX, SUFFIX1, [SUFFIX2...])
# ----------------------------------------------------------
# Produce a SEP delimited list of all paired combinations of elements of
# PREFIX-LIST with SUFFIX1 through SUFFIXn. Each element of the list
# has the form PREFIXmINFIXSUFFIXn.
# Needed until we can rely on m4_combine added in Autoconf 2.62.
m4_define([lt_combine],
[m4_if(m4_eval([$# > 3]), [1],
[m4_pushdef([_Lt_sep], [m4_define([_Lt_sep], m4_defn([lt_car]))])]]dnl
[[m4_foreach([_Lt_prefix], [$2],
[m4_foreach([_Lt_suffix],
]m4_dquote(m4_dquote(m4_shift(m4_shift(m4_shift($@)))))[,
[_Lt_sep([$1])[]m4_defn([_Lt_prefix])[$3]m4_defn([_Lt_suffix])])])])])
# lt_if_append_uniq(MACRO-NAME, VARNAME, [SEPARATOR], [UNIQ], [NOT-UNIQ])
# -----------------------------------------------------------------------
# Iff MACRO-NAME does not yet contain VARNAME, then append it (delimited
# by SEPARATOR if supplied) and expand UNIQ, else NOT-UNIQ.
m4_define([lt_if_append_uniq],
[m4_ifdef([$1],
[m4_if(m4_index([$3]m4_defn([$1])[$3], [$3$2$3]), [-1],
[lt_append([$1], [$2], [$3])$4],
[$5])],
[lt_append([$1], [$2], [$3])$4])])
# lt_dict_add(DICT, KEY, VALUE)
# -----------------------------
m4_define([lt_dict_add],
[m4_define([$1($2)], [$3])])
# lt_dict_add_subkey(DICT, KEY, SUBKEY, VALUE)
# --------------------------------------------
m4_define([lt_dict_add_subkey],
[m4_define([$1($2:$3)], [$4])])
# lt_dict_fetch(DICT, KEY, [SUBKEY])
# ----------------------------------
m4_define([lt_dict_fetch],
[m4_ifval([$3],
m4_ifdef([$1($2:$3)], [m4_defn([$1($2:$3)])]),
m4_ifdef([$1($2)], [m4_defn([$1($2)])]))])
# lt_if_dict_fetch(DICT, KEY, [SUBKEY], VALUE, IF-TRUE, [IF-FALSE])
# -----------------------------------------------------------------
m4_define([lt_if_dict_fetch],
[m4_if(lt_dict_fetch([$1], [$2], [$3]), [$4],
[$5],
[$6])])
# lt_dict_filter(DICT, [SUBKEY], VALUE, [SEPARATOR], KEY, [...])
# --------------------------------------------------------------
m4_define([lt_dict_filter],
[m4_if([$5], [], [],
[lt_join(m4_quote(m4_default([$4], [[, ]])),
lt_unquote(m4_split(m4_normalize(m4_foreach(_Lt_key, lt_car([m4_shiftn(4, $@)]),
[lt_if_dict_fetch([$1], _Lt_key, [$2], [$3], [_Lt_key ])])))))])[]dnl
])

23
src/Onigmo/m4/ltversion.m4 vendored Normal file
View File

@ -0,0 +1,23 @@
# ltversion.m4 -- version numbers -*- Autoconf -*-
#
# Copyright (C) 2004 Free Software Foundation, Inc.
# Written by Scott James Remnant, 2004
#
# This file is free software; the Free Software Foundation gives
# unlimited permission to copy and/or distribute it, with or without
# modifications, as long as this notice is preserved.
# Generated from ltversion.in.
# serial 3017 ltversion.m4
# This file is part of GNU Libtool
m4_define([LT_PACKAGE_VERSION], [2.2.6b])
m4_define([LT_PACKAGE_REVISION], [1.3017])
AC_DEFUN([LTVERSION_VERSION],
[macro_version='2.2.6b'
macro_revision='1.3017'
_LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?])
_LT_DECL(, macro_revision, 0)
])

92
src/Onigmo/m4/lt~obsolete.m4 vendored Normal file
View File

@ -0,0 +1,92 @@
# lt~obsolete.m4 -- aclocal satisfying obsolete definitions. -*-Autoconf-*-
#
# Copyright (C) 2004, 2005, 2007 Free Software Foundation, Inc.
# Written by Scott James Remnant, 2004.
#
# This file is free software; the Free Software Foundation gives
# unlimited permission to copy and/or distribute it, with or without
# modifications, as long as this notice is preserved.
# serial 4 lt~obsolete.m4
# These exist entirely to fool aclocal when bootstrapping libtool.
#
# In the past libtool.m4 has provided macros via AC_DEFUN (or AU_DEFUN)
# which have later been changed to m4_define as they aren't part of the
# exported API, or moved to Autoconf or Automake where they belong.
#
# The trouble is, aclocal is a bit thick. It'll see the old AC_DEFUN
# in /usr/share/aclocal/libtool.m4 and remember it, then when it sees us
# using a macro with the same name in our local m4/libtool.m4 it'll
# pull the old libtool.m4 in (it doesn't see our shiny new m4_define
# and doesn't know about Autoconf macros at all.)
#
# So we provide this file, which has a silly filename so it's always
# included after everything else. This provides aclocal with the
# AC_DEFUNs it wants, but when m4 processes it, it doesn't do anything
# because those macros already exist, or will be overwritten later.
# We use AC_DEFUN over AU_DEFUN for compatibility with aclocal-1.6.
#
# Anytime we withdraw an AC_DEFUN or AU_DEFUN, remember to add it here.
# Yes, that means every name once taken will need to remain here until
# we give up compatibility with versions before 1.7, at which point
# we need to keep only those names which we still refer to.
# This is to help aclocal find these macros, as it can't see m4_define.
AC_DEFUN([LTOBSOLETE_VERSION], [m4_if([1])])
m4_ifndef([AC_LIBTOOL_LINKER_OPTION], [AC_DEFUN([AC_LIBTOOL_LINKER_OPTION])])
m4_ifndef([AC_PROG_EGREP], [AC_DEFUN([AC_PROG_EGREP])])
m4_ifndef([_LT_AC_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH])])
m4_ifndef([_LT_AC_SHELL_INIT], [AC_DEFUN([_LT_AC_SHELL_INIT])])
m4_ifndef([_LT_AC_SYS_LIBPATH_AIX], [AC_DEFUN([_LT_AC_SYS_LIBPATH_AIX])])
m4_ifndef([_LT_PROG_LTMAIN], [AC_DEFUN([_LT_PROG_LTMAIN])])
m4_ifndef([_LT_AC_TAGVAR], [AC_DEFUN([_LT_AC_TAGVAR])])
m4_ifndef([AC_LTDL_ENABLE_INSTALL], [AC_DEFUN([AC_LTDL_ENABLE_INSTALL])])
m4_ifndef([AC_LTDL_PREOPEN], [AC_DEFUN([AC_LTDL_PREOPEN])])
m4_ifndef([_LT_AC_SYS_COMPILER], [AC_DEFUN([_LT_AC_SYS_COMPILER])])
m4_ifndef([_LT_AC_LOCK], [AC_DEFUN([_LT_AC_LOCK])])
m4_ifndef([AC_LIBTOOL_SYS_OLD_ARCHIVE], [AC_DEFUN([AC_LIBTOOL_SYS_OLD_ARCHIVE])])
m4_ifndef([_LT_AC_TRY_DLOPEN_SELF], [AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF])])
m4_ifndef([AC_LIBTOOL_PROG_CC_C_O], [AC_DEFUN([AC_LIBTOOL_PROG_CC_C_O])])
m4_ifndef([AC_LIBTOOL_SYS_HARD_LINK_LOCKS], [AC_DEFUN([AC_LIBTOOL_SYS_HARD_LINK_LOCKS])])
m4_ifndef([AC_LIBTOOL_OBJDIR], [AC_DEFUN([AC_LIBTOOL_OBJDIR])])
m4_ifndef([AC_LTDL_OBJDIR], [AC_DEFUN([AC_LTDL_OBJDIR])])
m4_ifndef([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH], [AC_DEFUN([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH])])
m4_ifndef([AC_LIBTOOL_SYS_LIB_STRIP], [AC_DEFUN([AC_LIBTOOL_SYS_LIB_STRIP])])
m4_ifndef([AC_PATH_MAGIC], [AC_DEFUN([AC_PATH_MAGIC])])
m4_ifndef([AC_PROG_LD_GNU], [AC_DEFUN([AC_PROG_LD_GNU])])
m4_ifndef([AC_PROG_LD_RELOAD_FLAG], [AC_DEFUN([AC_PROG_LD_RELOAD_FLAG])])
m4_ifndef([AC_DEPLIBS_CHECK_METHOD], [AC_DEFUN([AC_DEPLIBS_CHECK_METHOD])])
m4_ifndef([AC_LIBTOOL_PROG_COMPILER_NO_RTTI], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_NO_RTTI])])
m4_ifndef([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE], [AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])])
m4_ifndef([AC_LIBTOOL_PROG_COMPILER_PIC], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_PIC])])
m4_ifndef([AC_LIBTOOL_PROG_LD_SHLIBS], [AC_DEFUN([AC_LIBTOOL_PROG_LD_SHLIBS])])
m4_ifndef([AC_LIBTOOL_POSTDEP_PREDEP], [AC_DEFUN([AC_LIBTOOL_POSTDEP_PREDEP])])
m4_ifndef([LT_AC_PROG_EGREP], [AC_DEFUN([LT_AC_PROG_EGREP])])
m4_ifndef([LT_AC_PROG_SED], [AC_DEFUN([LT_AC_PROG_SED])])
m4_ifndef([_LT_CC_BASENAME], [AC_DEFUN([_LT_CC_BASENAME])])
m4_ifndef([_LT_COMPILER_BOILERPLATE], [AC_DEFUN([_LT_COMPILER_BOILERPLATE])])
m4_ifndef([_LT_LINKER_BOILERPLATE], [AC_DEFUN([_LT_LINKER_BOILERPLATE])])
m4_ifndef([_AC_PROG_LIBTOOL], [AC_DEFUN([_AC_PROG_LIBTOOL])])
m4_ifndef([AC_LIBTOOL_SETUP], [AC_DEFUN([AC_LIBTOOL_SETUP])])
m4_ifndef([_LT_AC_CHECK_DLFCN], [AC_DEFUN([_LT_AC_CHECK_DLFCN])])
m4_ifndef([AC_LIBTOOL_SYS_DYNAMIC_LINKER], [AC_DEFUN([AC_LIBTOOL_SYS_DYNAMIC_LINKER])])
m4_ifndef([_LT_AC_TAGCONFIG], [AC_DEFUN([_LT_AC_TAGCONFIG])])
m4_ifndef([AC_DISABLE_FAST_INSTALL], [AC_DEFUN([AC_DISABLE_FAST_INSTALL])])
m4_ifndef([_LT_AC_LANG_CXX], [AC_DEFUN([_LT_AC_LANG_CXX])])
m4_ifndef([_LT_AC_LANG_F77], [AC_DEFUN([_LT_AC_LANG_F77])])
m4_ifndef([_LT_AC_LANG_GCJ], [AC_DEFUN([_LT_AC_LANG_GCJ])])
m4_ifndef([AC_LIBTOOL_RC], [AC_DEFUN([AC_LIBTOOL_RC])])
m4_ifndef([AC_LIBTOOL_LANG_C_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_C_CONFIG])])
m4_ifndef([_LT_AC_LANG_C_CONFIG], [AC_DEFUN([_LT_AC_LANG_C_CONFIG])])
m4_ifndef([AC_LIBTOOL_LANG_CXX_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_CXX_CONFIG])])
m4_ifndef([_LT_AC_LANG_CXX_CONFIG], [AC_DEFUN([_LT_AC_LANG_CXX_CONFIG])])
m4_ifndef([AC_LIBTOOL_LANG_F77_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_F77_CONFIG])])
m4_ifndef([_LT_AC_LANG_F77_CONFIG], [AC_DEFUN([_LT_AC_LANG_F77_CONFIG])])
m4_ifndef([AC_LIBTOOL_LANG_GCJ_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_GCJ_CONFIG])])
m4_ifndef([_LT_AC_LANG_GCJ_CONFIG], [AC_DEFUN([_LT_AC_LANG_GCJ_CONFIG])])
m4_ifndef([AC_LIBTOOL_LANG_RC_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_RC_CONFIG])])
m4_ifndef([_LT_AC_LANG_RC_CONFIG], [AC_DEFUN([_LT_AC_LANG_RC_CONFIG])])
m4_ifndef([AC_LIBTOOL_CONFIG], [AC_DEFUN([AC_LIBTOOL_CONFIG])])
m4_ifndef([_LT_AC_FILE_LTDLL_C], [AC_DEFUN([_LT_AC_FILE_LTDLL_C])])

376
src/Onigmo/missing Normal file
View File

@ -0,0 +1,376 @@
#! /bin/sh
# Common stub for a few missing GNU programs while installing.
scriptversion=2009-04-28.21; # UTC
# Copyright (C) 1996, 1997, 1999, 2000, 2002, 2003, 2004, 2005, 2006,
# 2008, 2009 Free Software Foundation, Inc.
# Originally by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
# configuration script generated by Autoconf, you may include it under
# the same distribution terms that you use for the rest of that program.
if test $# -eq 0; then
echo 1>&2 "Try \`$0 --help' for more information"
exit 1
fi
run=:
sed_output='s/.* --output[ =]\([^ ]*\).*/\1/p'
sed_minuso='s/.* -o \([^ ]*\).*/\1/p'
# In the cases where this matters, `missing' is being run in the
# srcdir already.
if test -f configure.ac; then
configure_ac=configure.ac
else
configure_ac=configure.in
fi
msg="missing on your system"
case $1 in
--run)
# Try to run requested program, and just exit if it succeeds.
run=
shift
"$@" && exit 0
# Exit code 63 means version mismatch. This often happens
# when the user try to use an ancient version of a tool on
# a file that requires a minimum version. In this case we
# we should proceed has if the program had been absent, or
# if --run hadn't been passed.
if test $? = 63; then
run=:
msg="probably too old"
fi
;;
-h|--h|--he|--hel|--help)
echo "\
$0 [OPTION]... PROGRAM [ARGUMENT]...
Handle \`PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an
error status if there is no known handling for PROGRAM.
Options:
-h, --help display this help and exit
-v, --version output version information and exit
--run try to run the given command, and emulate it if it fails
Supported PROGRAM values:
aclocal touch file \`aclocal.m4'
autoconf touch file \`configure'
autoheader touch file \`config.h.in'
autom4te touch the output file, or create a stub one
automake touch all \`Makefile.in' files
bison create \`y.tab.[ch]', if possible, from existing .[ch]
flex create \`lex.yy.c', if possible, from existing .c
help2man touch the output file
lex create \`lex.yy.c', if possible, from existing .c
makeinfo touch the output file
tar try tar, gnutar, gtar, then tar without non-portable flags
yacc create \`y.tab.[ch]', if possible, from existing .[ch]
Version suffixes to PROGRAM as well as the prefixes \`gnu-', \`gnu', and
\`g' are ignored when checking the name.
Send bug reports to <bug-automake@gnu.org>."
exit $?
;;
-v|--v|--ve|--ver|--vers|--versi|--versio|--version)
echo "missing $scriptversion (GNU Automake)"
exit $?
;;
-*)
echo 1>&2 "$0: Unknown \`$1' option"
echo 1>&2 "Try \`$0 --help' for more information"
exit 1
;;
esac
# normalize program name to check for.
program=`echo "$1" | sed '
s/^gnu-//; t
s/^gnu//; t
s/^g//; t'`
# Now exit if we have it, but it failed. Also exit now if we
# don't have it and --version was passed (most likely to detect
# the program). This is about non-GNU programs, so use $1 not
# $program.
case $1 in
lex*|yacc*)
# Not GNU programs, they don't have --version.
;;
tar*)
if test -n "$run"; then
echo 1>&2 "ERROR: \`tar' requires --run"
exit 1
elif test "x$2" = "x--version" || test "x$2" = "x--help"; then
exit 1
fi
;;
*)
if test -z "$run" && ($1 --version) > /dev/null 2>&1; then
# We have it, but it failed.
exit 1
elif test "x$2" = "x--version" || test "x$2" = "x--help"; then
# Could not run --version or --help. This is probably someone
# running `$TOOL --version' or `$TOOL --help' to check whether
# $TOOL exists and not knowing $TOOL uses missing.
exit 1
fi
;;
esac
# If it does not exist, or fails to run (possibly an outdated version),
# try to emulate it.
case $program in
aclocal*)
echo 1>&2 "\
WARNING: \`$1' is $msg. You should only need it if
you modified \`acinclude.m4' or \`${configure_ac}'. You might want
to install the \`Automake' and \`Perl' packages. Grab them from
any GNU archive site."
touch aclocal.m4
;;
autoconf*)
echo 1>&2 "\
WARNING: \`$1' is $msg. You should only need it if
you modified \`${configure_ac}'. You might want to install the
\`Autoconf' and \`GNU m4' packages. Grab them from any GNU
archive site."
touch configure
;;
autoheader*)
echo 1>&2 "\
WARNING: \`$1' is $msg. You should only need it if
you modified \`acconfig.h' or \`${configure_ac}'. You might want
to install the \`Autoconf' and \`GNU m4' packages. Grab them
from any GNU archive site."
files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' ${configure_ac}`
test -z "$files" && files="config.h"
touch_files=
for f in $files; do
case $f in
*:*) touch_files="$touch_files "`echo "$f" |
sed -e 's/^[^:]*://' -e 's/:.*//'`;;
*) touch_files="$touch_files $f.in";;
esac
done
touch $touch_files
;;
automake*)
echo 1>&2 "\
WARNING: \`$1' is $msg. You should only need it if
you modified \`Makefile.am', \`acinclude.m4' or \`${configure_ac}'.
You might want to install the \`Automake' and \`Perl' packages.
Grab them from any GNU archive site."
find . -type f -name Makefile.am -print |
sed 's/\.am$/.in/' |
while read f; do touch "$f"; done
;;
autom4te*)
echo 1>&2 "\
WARNING: \`$1' is needed, but is $msg.
You might have modified some files without having the
proper tools for further handling them.
You can get \`$1' as part of \`Autoconf' from any GNU
archive site."
file=`echo "$*" | sed -n "$sed_output"`
test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"`
if test -f "$file"; then
touch $file
else
test -z "$file" || exec >$file
echo "#! /bin/sh"
echo "# Created by GNU Automake missing as a replacement of"
echo "# $ $@"
echo "exit 0"
chmod +x $file
exit 1
fi
;;
bison*|yacc*)
echo 1>&2 "\
WARNING: \`$1' $msg. You should only need it if
you modified a \`.y' file. You may need the \`Bison' package
in order for those modifications to take effect. You can get
\`Bison' from any GNU archive site."
rm -f y.tab.c y.tab.h
if test $# -ne 1; then
eval LASTARG="\${$#}"
case $LASTARG in
*.y)
SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'`
if test -f "$SRCFILE"; then
cp "$SRCFILE" y.tab.c
fi
SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'`
if test -f "$SRCFILE"; then
cp "$SRCFILE" y.tab.h
fi
;;
esac
fi
if test ! -f y.tab.h; then
echo >y.tab.h
fi
if test ! -f y.tab.c; then
echo 'main() { return 0; }' >y.tab.c
fi
;;
lex*|flex*)
echo 1>&2 "\
WARNING: \`$1' is $msg. You should only need it if
you modified a \`.l' file. You may need the \`Flex' package
in order for those modifications to take effect. You can get
\`Flex' from any GNU archive site."
rm -f lex.yy.c
if test $# -ne 1; then
eval LASTARG="\${$#}"
case $LASTARG in
*.l)
SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'`
if test -f "$SRCFILE"; then
cp "$SRCFILE" lex.yy.c
fi
;;
esac
fi
if test ! -f lex.yy.c; then
echo 'main() { return 0; }' >lex.yy.c
fi
;;
help2man*)
echo 1>&2 "\
WARNING: \`$1' is $msg. You should only need it if
you modified a dependency of a manual page. You may need the
\`Help2man' package in order for those modifications to take
effect. You can get \`Help2man' from any GNU archive site."
file=`echo "$*" | sed -n "$sed_output"`
test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"`
if test -f "$file"; then
touch $file
else
test -z "$file" || exec >$file
echo ".ab help2man is required to generate this page"
exit $?
fi
;;
makeinfo*)
echo 1>&2 "\
WARNING: \`$1' is $msg. You should only need it if
you modified a \`.texi' or \`.texinfo' file, or any other file
indirectly affecting the aspect of the manual. The spurious
call might also be the consequence of using a buggy \`make' (AIX,
DU, IRIX). You might want to install the \`Texinfo' package or
the \`GNU make' package. Grab either from any GNU archive site."
# The file to touch is that specified with -o ...
file=`echo "$*" | sed -n "$sed_output"`
test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"`
if test -z "$file"; then
# ... or it is the one specified with @setfilename ...
infile=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'`
file=`sed -n '
/^@setfilename/{
s/.* \([^ ]*\) *$/\1/
p
q
}' $infile`
# ... or it is derived from the source name (dir/f.texi becomes f.info)
test -z "$file" && file=`echo "$infile" | sed 's,.*/,,;s,.[^.]*$,,'`.info
fi
# If the file does not exist, the user really needs makeinfo;
# let's fail without touching anything.
test -f $file || exit 1
touch $file
;;
tar*)
shift
# We have already tried tar in the generic part.
# Look for gnutar/gtar before invocation to avoid ugly error
# messages.
if (gnutar --version > /dev/null 2>&1); then
gnutar "$@" && exit 0
fi
if (gtar --version > /dev/null 2>&1); then
gtar "$@" && exit 0
fi
firstarg="$1"
if shift; then
case $firstarg in
*o*)
firstarg=`echo "$firstarg" | sed s/o//`
tar "$firstarg" "$@" && exit 0
;;
esac
case $firstarg in
*h*)
firstarg=`echo "$firstarg" | sed s/h//`
tar "$firstarg" "$@" && exit 0
;;
esac
fi
echo 1>&2 "\
WARNING: I can't seem to be able to run \`tar' with the given arguments.
You may want to install GNU tar or Free paxutils, or check the
command line arguments."
exit 1
;;
*)
echo 1>&2 "\
WARNING: \`$1' is needed, and is $msg.
You might have modified some files without having the
proper tools for further handling them. Check the \`README' file,
it often tells you about the needed prerequisites for installing
this package. You may also peek at any GNU archive site, in case
some other package would contain this missing \`$1' program."
exit 1
;;
esac
exit 0
# Local variables:
# eval: (add-hook 'write-file-hooks 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-time-zone: "UTC"
# time-stamp-end: "; # UTC"
# End:

78
src/Onigmo/onig-config.in Normal file
View File

@ -0,0 +1,78 @@
#!/bin/sh
# Copyright (C) 2006 K.Kosako (sndgk393 AT ybb DOT ne DOT jp)
ONIG_VERSION=@PACKAGE_VERSION@
show_usage()
{
cat <<EOF
Usage: onig-config [OPTION]
Values for OPTION are:
--prefix[=DIR] change prefix to DIR
--prefix print prefix
--exec-prefix[=DIR] change exec_prefix to DIR
--exec-prefix print exec_prefix
--cflags print C compiler flags
--libs print library information
--version print oniguruma version
--help print this help
EOF
exit 1
}
if test $# -eq 0; then
show_usage
fi
prefix=@prefix@
exec_prefix=@exec_prefix@
is_set_exec_prefix=no
while test $# -gt 0; do
case "$1" in
-*=*) val=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'`
;;
*) val=
;;
esac
case $1 in
--prefix=*)
prefix=$val
if test $is_set_exec_prefix = no ; then
exec_prefix=$val
fi
;;
--prefix)
echo $prefix
;;
--exec-prefix=*)
exec_prefix=$val
is_set_exec_prefix=yes
;;
--exec-prefix)
echo $exec_prefix
;;
--cflags)
if test @includedir@ != /usr/include ; then
show_includedir=-I@includedir@
fi
echo $show_includedir
;;
--libs)
echo -L@libdir@ -lonig
;;
--version)
echo $ONIG_VERSION
;;
*)
show_usage
;;
esac
shift
done
# END

462
src/Onigmo/onig.py Normal file
View File

@ -0,0 +1,462 @@
# -*- coding: utf-8 -*-
"""Using Onigmo (Oniguruma-mod) regular expression library.
This is a low level wrapper for Onigmo regular expression DLL/shared object.
(This module does not support static link library.)
This provides almost same API as the original C API, so the API is not
object oriented.
Onigmo DLL (onig.dll, libonig.so, etc.) must be placed in the
default search path. The default search path depends on the system.
"""
import ctypes
import os
import sys
#__all__ = ["onig_new", "onig_free",
# "onig_search", "onig_match",
# "onig_region_new", "onig_region_free",
# "onig_version", "onig_copyright"]
#
# Type Definitions
#
OnigCodePoint = ctypes.c_uint
class OnigRegexType(ctypes.Structure):
_fields_ = [
]
regex_t = OnigRegexType
OnigRegex = ctypes.POINTER(OnigRegexType)
try:
# Python 2.7
_c_ssize_t = ctypes.c_ssize_t
except AttributeError:
# Python 2.6
if ctypes.sizeof(ctypes.c_int) == ctypes.sizeof(ctypes.c_void_p):
_c_ssize_t = ctypes.c_int
elif ctypes.sizeof(ctypes.c_long) == ctypes.sizeof(ctypes.c_void_p):
_c_ssize_t = ctypes.c_long
elif ctypes.sizeof(ctypes.c_longlong) == ctypes.sizeof(ctypes.c_void_p):
_c_ssize_t = ctypes.c_longlong
class OnigRegion(ctypes.Structure):
_fields_ = [
("allocated", ctypes.c_int),
("num_regs", ctypes.c_int),
("beg", ctypes.POINTER(_c_ssize_t)),
("end", ctypes.POINTER(_c_ssize_t)),
("history_root",ctypes.c_void_p),
]
re_registers = OnigRegion
OnigOptionType = ctypes.c_int
class OnigEncodingType(ctypes.Structure):
_fields_ = [
("mbc_enc_len", ctypes.c_void_p),
("name", ctypes.c_char_p),
("max_enc_len", ctypes.c_int),
("min_enc_len", ctypes.c_int),
("is_mbc_newline", ctypes.c_void_p),
("mbc_to_code", ctypes.c_void_p),
("code_to_mbclen", ctypes.c_void_p),
("code_to_mbc", ctypes.c_void_p),
("mbc_case_fold", ctypes.c_void_p),
("apply_all_case_fold", ctypes.c_void_p),
("get_case_fold_codes_by_str", ctypes.c_void_p),
("property_name_to_ctype", ctypes.c_void_p),
("is_code_ctype", ctypes.c_void_p),
("get_ctype_code_range", ctypes.c_void_p),
("left_adjust_char_head", ctypes.c_void_p),
("is_allowed_reverse_match",ctypes.c_void_p),
]
OnigEncoding = ctypes.POINTER(OnigEncodingType)
class OnigMetaCharTableType(ctypes.Structure):
_fields_ = [
("esc", OnigCodePoint),
("anychar", OnigCodePoint),
("anytime", OnigCodePoint),
("zero_or_one_time",OnigCodePoint),
("one_or_one_time", OnigCodePoint),
("anychar_anytime", OnigCodePoint),
]
class OnigSyntaxType(ctypes.Structure):
_fields_ = [
("op", ctypes.c_uint),
("op2", ctypes.c_uint),
("behavior", ctypes.c_uint),
("options", OnigOptionType),
("meta_char_table", OnigMetaCharTableType),
]
class OnigErrorInfo(ctypes.Structure):
_fields_ = [
("enc", OnigEncoding),
("par", ctypes.c_char_p),
("par_end", ctypes.c_char_p),
]
# load the DLL or the shared library
if os.name in ("nt", "ce"):
_libname = "onig.dll"
elif sys.platform == "cygwin":
_libname = "libonig.dll"
else:
_libname = "libonig.so"
libonig = ctypes.cdll.LoadLibrary(_libname)
#
# Encodings
#
def _load_encoding(enc):
return ctypes.pointer(OnigEncodingType.in_dll(libonig, enc))
ONIG_ENCODING_ASCII = _load_encoding("OnigEncodingASCII")
ONIG_ENCODING_ISO_8859_1 = _load_encoding("OnigEncodingISO_8859_1")
ONIG_ENCODING_ISO_8859_2 = _load_encoding("OnigEncodingISO_8859_2")
ONIG_ENCODING_ISO_8859_3 = _load_encoding("OnigEncodingISO_8859_3")
ONIG_ENCODING_ISO_8859_4 = _load_encoding("OnigEncodingISO_8859_4")
ONIG_ENCODING_ISO_8859_5 = _load_encoding("OnigEncodingISO_8859_5")
ONIG_ENCODING_ISO_8859_6 = _load_encoding("OnigEncodingISO_8859_6")
ONIG_ENCODING_ISO_8859_7 = _load_encoding("OnigEncodingISO_8859_7")
ONIG_ENCODING_ISO_8859_8 = _load_encoding("OnigEncodingISO_8859_8")
ONIG_ENCODING_ISO_8859_9 = _load_encoding("OnigEncodingISO_8859_9")
ONIG_ENCODING_ISO_8859_10 = _load_encoding("OnigEncodingISO_8859_10")
ONIG_ENCODING_ISO_8859_11 = _load_encoding("OnigEncodingISO_8859_11")
ONIG_ENCODING_ISO_8859_13 = _load_encoding("OnigEncodingISO_8859_13")
ONIG_ENCODING_ISO_8859_14 = _load_encoding("OnigEncodingISO_8859_14")
ONIG_ENCODING_ISO_8859_15 = _load_encoding("OnigEncodingISO_8859_15")
ONIG_ENCODING_ISO_8859_16 = _load_encoding("OnigEncodingISO_8859_16")
ONIG_ENCODING_UTF8 = _load_encoding("OnigEncodingUTF8")
ONIG_ENCODING_UTF16_LE = _load_encoding("OnigEncodingUTF16_LE")
ONIG_ENCODING_UTF16_BE = _load_encoding("OnigEncodingUTF16_BE")
ONIG_ENCODING_UTF32_LE = _load_encoding("OnigEncodingUTF32_LE")
ONIG_ENCODING_UTF32_BE = _load_encoding("OnigEncodingUTF32_BE")
ONIG_ENCODING_EUC_JP = _load_encoding("OnigEncodingEUC_JP")
ONIG_ENCODING_EUC_TW = _load_encoding("OnigEncodingEUC_TW")
ONIG_ENCODING_EUC_KR = _load_encoding("OnigEncodingEUC_KR")
ONIG_ENCODING_EUC_CN = _load_encoding("OnigEncodingEUC_CN")
ONIG_ENCODING_SJIS = _load_encoding("OnigEncodingSJIS")
try:
ONIG_ENCODING_CP932 = _load_encoding("OnigEncodingCP932")
except ValueError:
pass
#ONIG_ENCODING_KOI8 = _load_encoding("OnigEncodingKOI8")
ONIG_ENCODING_KOI8_R = _load_encoding("OnigEncodingKOI8_R")
ONIG_ENCODING_CP1251 = _load_encoding("OnigEncodingCP1251")
ONIG_ENCODING_BIG5 = _load_encoding("OnigEncodingBIG5")
ONIG_ENCODING_GB18030 = _load_encoding("OnigEncodingGB18030")
#ONIG_ENCODING_UNDEF = None
#
# Syntaxes
#
def _load_syntax(syn):
return ctypes.pointer(OnigSyntaxType.in_dll(libonig, syn))
ONIG_SYNTAX_ASIS = _load_syntax("OnigSyntaxASIS")
ONIG_SYNTAX_POSIX_BASIC = _load_syntax("OnigSyntaxPosixBasic")
ONIG_SYNTAX_POSIX_EXTENDED = _load_syntax("OnigSyntaxPosixExtended")
ONIG_SYNTAX_EMACS = _load_syntax("OnigSyntaxEmacs")
ONIG_SYNTAX_GREP = _load_syntax("OnigSyntaxGrep")
ONIG_SYNTAX_GNU_REGEX = _load_syntax("OnigSyntaxGnuRegex")
ONIG_SYNTAX_JAVA = _load_syntax("OnigSyntaxJava")
ONIG_SYNTAX_PERL = _load_syntax("OnigSyntaxPerl")
try:
ONIG_SYNTAX_PERL58 = _load_syntax("OnigSyntaxPerl58")
ONIG_SYNTAX_PERL58_NG = _load_syntax("OnigSyntaxPerl58_NG")
except ValueError:
pass
try:
ONIG_SYNTAX_PERL_NG = _load_syntax("OnigSyntaxPerl_NG")
except ValueError:
pass
ONIG_SYNTAX_RUBY = _load_syntax("OnigSyntaxRuby")
try:
ONIG_SYNTAX_PYTHON = _load_syntax("OnigSyntaxPython")
except ValueError:
pass
ONIG_SYNTAX_DEFAULT = ctypes.POINTER(OnigSyntaxType).in_dll(
libonig, "OnigDefaultSyntax")
#
# Constants
#
ONIG_MAX_ERROR_MESSAGE_LEN = 90
# options
ONIG_OPTION_NONE = 0
ONIG_OPTION_IGNORECASE = 1
ONIG_OPTION_EXTEND = (ONIG_OPTION_IGNORECASE << 1)
ONIG_OPTION_MULTILINE = (ONIG_OPTION_EXTEND << 1)
ONIG_OPTION_DOTALL = ONIG_OPTION_MULTILINE
ONIG_OPTION_SINGLELINE = (ONIG_OPTION_MULTILINE << 1)
ONIG_OPTION_FIND_LONGEST = (ONIG_OPTION_SINGLELINE << 1)
ONIG_OPTION_FIND_NOT_EMPTY = (ONIG_OPTION_FIND_LONGEST << 1)
ONIG_OPTION_NEGATE_SINGLELINE = (ONIG_OPTION_FIND_NOT_EMPTY << 1)
ONIG_OPTION_DONT_CAPTURE_GROUP = (ONIG_OPTION_NEGATE_SINGLELINE << 1)
ONIG_OPTION_CAPTURE_GROUP = (ONIG_OPTION_DONT_CAPTURE_GROUP << 1)
# options (search time)
ONIG_OPTION_NOTBOL = (ONIG_OPTION_CAPTURE_GROUP << 1)
ONIG_OPTION_NOTEOL = (ONIG_OPTION_NOTBOL << 1)
ONIG_OPTION_POSIX_REGION = (ONIG_OPTION_NOTEOL << 1)
# options (ctype range)
ONIG_OPTION_ASCII_RANGE = (ONIG_OPTION_POSIX_REGION << 1)
ONIG_OPTION_POSIX_BRACKET_ALL_RANGE = (ONIG_OPTION_ASCII_RANGE << 1)
ONIG_OPTION_WORD_BOUND_ALL_RANGE = (ONIG_OPTION_POSIX_BRACKET_ALL_RANGE << 1)
# options (newline)
ONIG_OPTION_NEWLINE_CRLF = (ONIG_OPTION_WORD_BOUND_ALL_RANGE << 1)
ONIG_OPTION_DEFAULT = ONIG_OPTION_NONE
# syntax (operators)
ONIG_SYN_OP_VARIABLE_META_CHARACTERS = (1<<0)
ONIG_SYN_OP_DOT_ANYCHAR = (1<<1)
ONIG_SYN_OP_ASTERISK_ZERO_INF = (1<<2)
ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF = (1<<3)
ONIG_SYN_OP_PLUS_ONE_INF = (1<<4)
ONIG_SYN_OP_ESC_PLUS_ONE_INF = (1<<5)
ONIG_SYN_OP_QMARK_ZERO_ONE = (1<<6)
ONIG_SYN_OP_ESC_QMARK_ZERO_ONE = (1<<7)
ONIG_SYN_OP_BRACE_INTERVAL = (1<<8)
ONIG_SYN_OP_ESC_BRACE_INTERVAL = (1<<9)
ONIG_SYN_OP_VBAR_ALT = (1<<10)
ONIG_SYN_OP_ESC_VBAR_ALT = (1<<11)
ONIG_SYN_OP_LPAREN_SUBEXP = (1<<12)
ONIG_SYN_OP_ESC_LPAREN_SUBEXP = (1<<13)
ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR = (1<<14)
ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR = (1<<15)
ONIG_SYN_OP_DECIMAL_BACKREF = (1<<16)
ONIG_SYN_OP_BRACKET_CC = (1<<17)
ONIG_SYN_OP_ESC_W_WORD = (1<<18)
ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END = (1<<19)
ONIG_SYN_OP_ESC_B_WORD_BOUND = (1<<20)
ONIG_SYN_OP_ESC_S_WHITE_SPACE = (1<<21)
ONIG_SYN_OP_ESC_D_DIGIT = (1<<22)
ONIG_SYN_OP_LINE_ANCHOR = (1<<23)
ONIG_SYN_OP_POSIX_BRACKET = (1<<24)
ONIG_SYN_OP_QMARK_NON_GREEDY = (1<<25)
ONIG_SYN_OP_ESC_CONTROL_CHARS = (1<<26)
ONIG_SYN_OP_ESC_C_CONTROL = (1<<27)
ONIG_SYN_OP_ESC_OCTAL3 = (1<<28)
ONIG_SYN_OP_ESC_X_HEX2 = (1<<29)
ONIG_SYN_OP_ESC_X_BRACE_HEX8 = (1<<30)
ONIG_SYN_OP_ESC_O_BRACE_OCTAL = (1<<31)
ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE = (1<<0)
ONIG_SYN_OP2_QMARK_GROUP_EFFECT = (1<<1)
ONIG_SYN_OP2_OPTION_PERL = (1<<2)
ONIG_SYN_OP2_OPTION_RUBY = (1<<3)
ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT = (1<<4)
ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL = (1<<5)
ONIG_SYN_OP2_CCLASS_SET_OP = (1<<6)
ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP = (1<<7)
ONIG_SYN_OP2_ESC_K_NAMED_BACKREF = (1<<8)
ONIG_SYN_OP2_ESC_G_SUBEXP_CALL = (1<<9)
ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY = (1<<10)
ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL = (1<<11)
ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META = (1<<12)
ONIG_SYN_OP2_ESC_V_VTAB = (1<<13)
ONIG_SYN_OP2_ESC_U_HEX4 = (1<<14)
ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR = (1<<15)
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY = (1<<16)
ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT = (1<<17)
#ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS = (1<<18)
ONIG_SYN_OP2_ESC_H_XDIGIT = (1<<19)
ONIG_SYN_OP2_INEFFECTIVE_ESCAPE = (1<<20)
ONIG_SYN_OP2_ESC_CAPITAL_R_LINEBREAK = (1<<21)
ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER = (1<<22)
ONIG_SYN_OP2_ESC_V_VERTICAL_WHITESPACE = (1<<23)
ONIG_SYN_OP2_ESC_H_HORIZONTAL_WHITESPACE = (1<<24)
ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP = (1<<25)
ONIG_SYN_OP2_ESC_G_BRACE_BACKREF = (1<<26)
ONIG_SYN_OP2_QMARK_SUBEXP_CALL = (1<<27)
ONIG_SYN_OP2_QMARK_VBAR_BRANCH_RESET = (1<<28)
ONIG_SYN_OP2_QMARK_LPAREN_CONDITION = (1<<29)
ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP = (1<<30)
ONIG_SYN_OP2_OPTION_JAVA = (1<<31)
# syntax (behavior)
ONIG_SYN_CONTEXT_INDEP_ANCHORS = (1<<31)
ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS = (1<<0)
ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS = (1<<1)
ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP = (1<<2)
ONIG_SYN_ALLOW_INVALID_INTERVAL = (1<<3)
ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV = (1<<4)
ONIG_SYN_STRICT_CHECK_BACKREF = (1<<5)
ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND = (1<<6)
ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP = (1<<7)
ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME = (1<<8)
ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY = (1<<9)
ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL = (1<<10)
# (behavior) in char class [...]
ONIG_SYN_POSIX_BRACKET_ALWAYS_ALL_RANGE = (1<<19)
ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC = (1<<20)
ONIG_SYN_BACKSLASH_ESCAPE_IN_CC = (1<<21)
ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC = (1<<22)
ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC = (1<<23)
# syntax (behavior) warning
ONIG_SYN_WARN_CC_OP_NOT_ESCAPED = (1<<24)
ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT = (1<<25)
# meta character specifiers (onig_set_meta_char())
ONIG_META_CHAR_ESCAPE = 0
ONIG_META_CHAR_ANYCHAR = 1
ONIG_META_CHAR_ANYTIME = 2
ONIG_META_CHAR_ZERO_OR_ONE_TIME = 3
ONIG_META_CHAR_ONE_OR_MORE_TIME = 4
ONIG_META_CHAR_ANYCHAR_ANYTIME = 5
ONIG_INEFFECTIVE_META_CHAR = 0
# error codes
def ONIG_IS_PATTERN_ERROR(ecode):
return ((ecode) <= -100 and (ecode) > -1000)
# normal return
ONIG_NORMAL = 0
ONIG_MISMATCH = -1
ONIG_NO_SUPPORT_CONFIG = -2
# internal error
# general error
ONIGERR_INVALID_ARGUMENT = -30
# syntax error
# values error (syntax error)
# errors related to thread
ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT = -1001
#
# Onigmo APIs
#
# onig_init
onig_init = libonig.onig_init
# onig_error_code_to_str
libonig.onig_error_code_to_str.argtypes = [ctypes.c_char_p, ctypes.c_int,
ctypes.POINTER(OnigErrorInfo)]
def onig_error_code_to_str(err_buf, err_code, err_info=None):
return libonig.onig_error_code_to_str(err_buf, err_code, err_info)
# onig_set_warn_func
# onig_set_verb_warn_func
# onig_new
libonig.onig_new.argtypes = [ctypes.POINTER(OnigRegex),
ctypes.c_void_p, ctypes.c_void_p,
OnigOptionType, OnigEncoding, ctypes.POINTER(OnigSyntaxType),
ctypes.POINTER(OnigErrorInfo)]
onig_new = libonig.onig_new
# onig_reg_init
# onig_new_without_alloc
# onig_new_deluxe
# onig_free
libonig.onig_free.argtypes = [OnigRegex]
onig_free = libonig.onig_free
# onig_free_body
# onig_recompile
# onig_recompile_deluxe
# onig_search
libonig.onig_search.argtypes = [OnigRegex,
ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p,
ctypes.POINTER(OnigRegion), OnigOptionType]
libonig.onig_search.restype = _c_ssize_t
onig_search = libonig.onig_search
# onig_search_gpos
# onig_match
libonig.onig_match.argtypes = [OnigRegex,
ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p,
ctypes.POINTER(OnigRegion), OnigOptionType]
libonig.onig_match.restype = _c_ssize_t
onig_match = libonig.onig_match
# onig_region_new
libonig.onig_region_new.argtypes = []
libonig.onig_region_new.restype = ctypes.POINTER(OnigRegion)
onig_region_new = libonig.onig_region_new
# onig_region_init
# onig_region_free
libonig.onig_region_free.argtypes = [ctypes.POINTER(OnigRegion), ctypes.c_int]
onig_region_free = libonig.onig_region_free
# onig_region_copy
# onig_region_clear
# onig_region_resize
# onig_region_set
# onig_name_to_group_numbers
# onig_name_to_backref_number
# onig_foreach_name
# onig_number_of_names
# onig_number_of_captures
# onig_number_of_capture_histories
# onig_get_capture_tree
# onig_capture_tree_traverse
# onig_noname_group_capture_is_active
# onig_get_encoding
# onig_get_options
# onig_get_case_fold_flag
# onig_get_syntax
# onig_set_default_syntax
# onig_copy_syntax
libonig.onig_copy_syntax.argtypes = [ctypes.POINTER(OnigSyntaxType),
ctypes.POINTER(OnigSyntaxType)]
onig_copy_syntax = libonig.onig_copy_syntax
# onig_get_syntax_op
# onig_get_syntax_op2
# onig_get_syntax_behavior
# onig_get_syntax_options
# onig_set_syntax_op
# onig_set_syntax_op2
# onig_set_syntax_behavior
# onig_set_syntax_options
# onig_set_meta_char
# onig_copy_encoding
# onig_get_default_case_fold_flag
# onig_set_default_case_fold_flag
# onig_get_match_stack_limit_size
# onig_set_match_stack_limit_size
# onig_end
libonig.onig_end.argtypes = []
onig_end = libonig.onig_end
# onig_version
libonig.onig_version.argtypes = []
libonig.onig_version.restype = ctypes.c_char_p
def onig_version():
return libonig.onig_version().decode()
# onig_copyright
libonig.onig_copyright.argtypes = []
libonig.onig_copyright.restype = ctypes.c_char_p
def onig_copyright():
return libonig.onig_copyright().decode()

85
src/Onigmo/oniggnu.h Normal file
View File

@ -0,0 +1,85 @@
#ifndef ONIGGNU_H
#define ONIGGNU_H
/**********************************************************************
oniggnu.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "oniguruma.h"
#ifdef __cplusplus
extern "C" {
#endif
#define RE_MBCTYPE_ASCII 0
#define RE_MBCTYPE_EUC 1
#define RE_MBCTYPE_SJIS 2
#define RE_MBCTYPE_UTF8 3
/* GNU regex options */
#ifndef RE_NREGS
#define RE_NREGS ONIG_NREGION
#endif
#define RE_OPTION_IGNORECASE ONIG_OPTION_IGNORECASE
#define RE_OPTION_EXTENDED ONIG_OPTION_EXTEND
#define RE_OPTION_MULTILINE ONIG_OPTION_MULTILINE
#define RE_OPTION_SINGLELINE ONIG_OPTION_SINGLELINE
#define RE_OPTION_LONGEST ONIG_OPTION_FIND_LONGEST
#define RE_OPTION_POSIXLINE (RE_OPTION_MULTILINE|RE_OPTION_SINGLELINE)
#define RE_OPTION_FIND_NOT_EMPTY ONIG_OPTION_FIND_NOT_EMPTY
#define RE_OPTION_NEGATE_SINGLELINE ONIG_OPTION_NEGATE_SINGLELINE
#define RE_OPTION_DONT_CAPTURE_GROUP ONIG_OPTION_DONT_CAPTURE_GROUP
#define RE_OPTION_CAPTURE_GROUP ONIG_OPTION_CAPTURE_GROUP
ONIG_EXTERN
void re_mbcinit P_((int));
ONIG_EXTERN
int re_compile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf));
ONIG_EXTERN
int re_recompile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf));
ONIG_EXTERN
void re_free_pattern P_((struct re_pattern_buffer*));
ONIG_EXTERN
int re_adjust_startpos P_((struct re_pattern_buffer*, const char*, int, int, int));
ONIG_EXTERN
int re_search P_((struct re_pattern_buffer*, const char*, int, int, int, struct re_registers*));
ONIG_EXTERN
int re_match P_((struct re_pattern_buffer*, const char *, int, int, struct re_registers*));
ONIG_EXTERN
void re_set_casetable P_((const char*));
ONIG_EXTERN
void re_free_registers P_((struct re_registers*));
ONIG_EXTERN
int re_alloc_pattern P_((struct re_pattern_buffer**)); /* added */
#ifdef __cplusplus
}
#endif
#endif /* ONIGGNU_H */

169
src/Onigmo/onigposix.h Normal file
View File

@ -0,0 +1,169 @@
#ifndef ONIGPOSIX_H
#define ONIGPOSIX_H
/**********************************************************************
onigposix.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
/* options */
#define REG_ICASE (1<<0)
#define REG_NEWLINE (1<<1)
#define REG_NOTBOL (1<<2)
#define REG_NOTEOL (1<<3)
#define REG_EXTENDED (1<<4) /* if not set, Basic Onigular Expression */
#define REG_NOSUB (1<<5)
/* POSIX error codes */
#define REG_NOMATCH 1
#define REG_BADPAT 2
#define REG_ECOLLATE 3
#define REG_ECTYPE 4
#define REG_EESCAPE 5
#define REG_ESUBREG 6
#define REG_EBRACK 7
#define REG_EPAREN 8
#define REG_EBRACE 9
#define REG_BADBR 10
#define REG_ERANGE 11
#define REG_ESPACE 12
#define REG_BADRPT 13
/* extended error codes */
#define REG_EONIG_INTERNAL 14
#define REG_EONIG_BADWC 15
#define REG_EONIG_BADARG 16
#define REG_EONIG_THREAD 17
/* character encodings (for reg_set_encoding()) */
#define REG_POSIX_ENCODING_ASCII 0
#define REG_POSIX_ENCODING_EUC_JP 1
#define REG_POSIX_ENCODING_SJIS 2
#define REG_POSIX_ENCODING_UTF8 3
#define REG_POSIX_ENCODING_UTF16_BE 4
#define REG_POSIX_ENCODING_UTF16_LE 5
typedef int regoff_t;
typedef struct {
regoff_t rm_so;
regoff_t rm_eo;
} regmatch_t;
/* POSIX regex_t */
typedef struct {
void* onig; /* Oniguruma regex_t* */
size_t re_nsub;
int comp_options;
} regex_t;
#ifndef P_
#if defined(__STDC__) || defined(_WIN32)
# define P_(args) args
#else
# define P_(args) ()
#endif
#endif
#ifndef ONIG_EXTERN
#if defined(_WIN32) && !defined(__GNUC__)
#if defined(EXPORT)
#define ONIG_EXTERN extern __declspec(dllexport)
#else
#define ONIG_EXTERN extern __declspec(dllimport)
#endif
#endif
#endif
#ifndef ONIG_EXTERN
#define ONIG_EXTERN extern
#endif
#ifndef ONIGURUMA_H
typedef unsigned int OnigOptionType;
/* syntax */
typedef struct {
unsigned int op;
unsigned int op2;
unsigned int behavior;
OnigOptionType options; /* default option */
} OnigSyntaxType;
ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic;
ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended;
ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs;
ONIG_EXTERN OnigSyntaxType OnigSyntaxGrep;
ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex;
ONIG_EXTERN OnigSyntaxType OnigSyntaxJava;
ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl;
ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby;
/* predefined syntaxes (see regsyntax.c) */
#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic)
#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended)
#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs)
#define ONIG_SYNTAX_GREP (&OnigSyntaxGrep)
#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex)
#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava)
#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl)
#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby)
/* default syntax */
#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax
ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
ONIG_EXTERN int onig_set_default_syntax P_((OnigSyntaxType* syntax));
ONIG_EXTERN void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from));
ONIG_EXTERN const char* onig_version P_((void));
ONIG_EXTERN const char* onig_copyright P_((void));
#endif /* ONIGURUMA_H */
ONIG_EXTERN int regcomp P_((regex_t* reg, const char* pat, int options));
ONIG_EXTERN int regexec P_((regex_t* reg, const char* str, size_t nmatch, regmatch_t* matches, int options));
ONIG_EXTERN void regfree P_((regex_t* reg));
ONIG_EXTERN size_t regerror P_((int code, const regex_t* reg, char* buf, size_t size));
/* extended API */
ONIG_EXTERN void reg_set_encoding P_((int enc));
ONIG_EXTERN int reg_name_to_group_numbers P_((regex_t* reg, const unsigned char* name, const unsigned char* name_end, int** nums));
ONIG_EXTERN int reg_foreach_name P_((regex_t* reg, int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*), void* arg));
ONIG_EXTERN int reg_number_of_names P_((regex_t* reg));
#ifdef __cplusplus
}
#endif
#endif /* ONIGPOSIX_H */

866
src/Onigmo/oniguruma.h Normal file
View File

@ -0,0 +1,866 @@
#ifndef ONIGURUMA_H
#define ONIGURUMA_H
/**********************************************************************
oniguruma.h - Onigmo (Oniguruma-mod) (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2009 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2011-2013 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifdef __cplusplus
extern "C" {
#endif
#define ONIGURUMA
#define ONIGURUMA_VERSION_MAJOR 5
#define ONIGURUMA_VERSION_MINOR 13
#define ONIGURUMA_VERSION_TEENY 5
#ifdef __cplusplus
# ifndef HAVE_PROTOTYPES
# define HAVE_PROTOTYPES 1
# endif
# ifndef HAVE_STDARG_PROTOTYPES
# define HAVE_STDARG_PROTOTYPES 1
# endif
#endif
/* escape Mac OS X/Xcode 2.4/gcc 4.0.1 problem */
#if defined(__APPLE__) && defined(__GNUC__) && __GNUC__ >= 4
# ifndef HAVE_STDARG_PROTOTYPES
# define HAVE_STDARG_PROTOTYPES 1
# endif
#endif
#ifdef HAVE_STDARG_H
# ifndef HAVE_STDARG_PROTOTYPES
# define HAVE_STDARG_PROTOTYPES 1
# endif
#endif
#ifndef P_
#if defined(__STDC__) || defined(_WIN32)
# define P_(args) args
#else
# define P_(args) ()
#endif
#endif
#ifndef PV_
#ifdef HAVE_STDARG_PROTOTYPES
# define PV_(args) args
#else
# define PV_(args) ()
#endif
#endif
#ifndef ONIG_EXTERN
#if defined(_WIN32) && !defined(__GNUC__)
#if defined(EXPORT) || defined(RUBY_EXPORT)
#define ONIG_EXTERN extern __declspec(dllexport)
#else
#define ONIG_EXTERN extern __declspec(dllimport)
#endif
#endif
#endif
#ifndef ONIG_EXTERN
#define ONIG_EXTERN extern
#endif
#include <stddef.h> /* for size_t */
/* PART: character encoding */
#ifndef ONIG_ESCAPE_UCHAR_COLLISION
#define UChar OnigUChar
#endif
typedef unsigned char OnigUChar;
typedef unsigned int OnigCodePoint;
typedef unsigned int OnigCtype;
typedef size_t OnigDistance;
typedef ptrdiff_t OnigPosition;
#define ONIG_INFINITE_DISTANCE ~((OnigDistance )0)
typedef unsigned int OnigCaseFoldType; /* case fold flag */
ONIG_EXTERN OnigCaseFoldType OnigDefaultCaseFoldFlag;
/* #define ONIGENC_CASE_FOLD_HIRAGANA_KATAKANA (1<<1) */
/* #define ONIGENC_CASE_FOLD_KATAKANA_WIDTH (1<<2) */
#define ONIGENC_CASE_FOLD_TURKISH_AZERI (1<<20)
#define INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR (1<<30)
#define ONIGENC_CASE_FOLD_MIN INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR
#define ONIGENC_CASE_FOLD_DEFAULT OnigDefaultCaseFoldFlag
#define ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN 3
#define ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM 13
/* 13 => Unicode:0x1ffc */
/* code range */
#define ONIGENC_CODE_RANGE_NUM(range) ((int )range[0])
#define ONIGENC_CODE_RANGE_FROM(range,i) range[((i)*2) + 1]
#define ONIGENC_CODE_RANGE_TO(range,i) range[((i)*2) + 2]
typedef struct {
int byte_len; /* argument(original) character(s) byte length */
int code_len; /* number of code */
OnigCodePoint code[ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN];
} OnigCaseFoldCodeItem;
typedef struct {
OnigCodePoint esc;
OnigCodePoint anychar;
OnigCodePoint anytime;
OnigCodePoint zero_or_one_time;
OnigCodePoint one_or_more_time;
OnigCodePoint anychar_anytime;
} OnigMetaCharTableType;
typedef int (*OnigApplyAllCaseFoldFunc)(OnigCodePoint from, OnigCodePoint* to, int to_len, void* arg);
typedef struct OnigEncodingTypeST {
int (*mbc_enc_len)(const OnigUChar* p);
const char* name;
int max_enc_len;
int min_enc_len;
int (*is_mbc_newline)(const OnigUChar* p, const OnigUChar* end);
OnigCodePoint (*mbc_to_code)(const OnigUChar* p, const OnigUChar* end);
int (*code_to_mbclen)(OnigCodePoint code);
int (*code_to_mbc)(OnigCodePoint code, OnigUChar *buf);
int (*mbc_case_fold)(OnigCaseFoldType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to);
int (*apply_all_case_fold)(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg);
int (*get_case_fold_codes_by_str)(OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem acs[]);
int (*property_name_to_ctype)(struct OnigEncodingTypeST* enc, OnigUChar* p, OnigUChar* end);
int (*is_code_ctype)(OnigCodePoint code, OnigCtype ctype);
int (*get_ctype_code_range)(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[]);
OnigUChar* (*left_adjust_char_head)(const OnigUChar* start, const OnigUChar* p);
int (*is_allowed_reverse_match)(const OnigUChar* p, const OnigUChar* end);
unsigned int flags;
} OnigEncodingType;
typedef OnigEncodingType* OnigEncoding;
ONIG_EXTERN OnigEncodingType OnigEncodingASCII;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_1;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_2;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_3;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_4;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_5;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_6;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_7;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_8;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_9;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_10;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_11;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_13;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_14;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_15;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_16;
ONIG_EXTERN OnigEncodingType OnigEncodingUTF8;
ONIG_EXTERN OnigEncodingType OnigEncodingUTF16_BE;
ONIG_EXTERN OnigEncodingType OnigEncodingUTF16_LE;
ONIG_EXTERN OnigEncodingType OnigEncodingUTF32_BE;
ONIG_EXTERN OnigEncodingType OnigEncodingUTF32_LE;
ONIG_EXTERN OnigEncodingType OnigEncodingEUC_JP;
ONIG_EXTERN OnigEncodingType OnigEncodingEUC_TW;
ONIG_EXTERN OnigEncodingType OnigEncodingEUC_KR;
ONIG_EXTERN OnigEncodingType OnigEncodingEUC_CN;
ONIG_EXTERN OnigEncodingType OnigEncodingSJIS;
ONIG_EXTERN OnigEncodingType OnigEncodingCP932;
ONIG_EXTERN OnigEncodingType OnigEncodingKOI8;
ONIG_EXTERN OnigEncodingType OnigEncodingKOI8_R;
ONIG_EXTERN OnigEncodingType OnigEncodingCP1251;
ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
ONIG_EXTERN OnigEncodingType OnigEncodingGB18030;
#define ONIG_ENCODING_ASCII (&OnigEncodingASCII)
#define ONIG_ENCODING_ISO_8859_1 (&OnigEncodingISO_8859_1)
#define ONIG_ENCODING_ISO_8859_2 (&OnigEncodingISO_8859_2)
#define ONIG_ENCODING_ISO_8859_3 (&OnigEncodingISO_8859_3)
#define ONIG_ENCODING_ISO_8859_4 (&OnigEncodingISO_8859_4)
#define ONIG_ENCODING_ISO_8859_5 (&OnigEncodingISO_8859_5)
#define ONIG_ENCODING_ISO_8859_6 (&OnigEncodingISO_8859_6)
#define ONIG_ENCODING_ISO_8859_7 (&OnigEncodingISO_8859_7)
#define ONIG_ENCODING_ISO_8859_8 (&OnigEncodingISO_8859_8)
#define ONIG_ENCODING_ISO_8859_9 (&OnigEncodingISO_8859_9)
#define ONIG_ENCODING_ISO_8859_10 (&OnigEncodingISO_8859_10)
#define ONIG_ENCODING_ISO_8859_11 (&OnigEncodingISO_8859_11)
#define ONIG_ENCODING_ISO_8859_13 (&OnigEncodingISO_8859_13)
#define ONIG_ENCODING_ISO_8859_14 (&OnigEncodingISO_8859_14)
#define ONIG_ENCODING_ISO_8859_15 (&OnigEncodingISO_8859_15)
#define ONIG_ENCODING_ISO_8859_16 (&OnigEncodingISO_8859_16)
#define ONIG_ENCODING_UTF8 (&OnigEncodingUTF8)
#define ONIG_ENCODING_UTF16_BE (&OnigEncodingUTF16_BE)
#define ONIG_ENCODING_UTF16_LE (&OnigEncodingUTF16_LE)
#define ONIG_ENCODING_UTF32_BE (&OnigEncodingUTF32_BE)
#define ONIG_ENCODING_UTF32_LE (&OnigEncodingUTF32_LE)
#define ONIG_ENCODING_EUC_JP (&OnigEncodingEUC_JP)
#define ONIG_ENCODING_EUC_TW (&OnigEncodingEUC_TW)
#define ONIG_ENCODING_EUC_KR (&OnigEncodingEUC_KR)
#define ONIG_ENCODING_EUC_CN (&OnigEncodingEUC_CN)
#define ONIG_ENCODING_SJIS (&OnigEncodingSJIS)
#define ONIG_ENCODING_CP932 (&OnigEncodingCP932)
#define ONIG_ENCODING_KOI8 (&OnigEncodingKOI8)
#define ONIG_ENCODING_KOI8_R (&OnigEncodingKOI8_R)
#define ONIG_ENCODING_CP1251 (&OnigEncodingCP1251)
#define ONIG_ENCODING_BIG5 (&OnigEncodingBIG5)
#define ONIG_ENCODING_GB18030 (&OnigEncodingGB18030)
#define ONIG_ENCODING_UNDEF ((OnigEncoding )0)
/* work size */
#define ONIGENC_CODE_TO_MBC_MAXLEN 7
#define ONIGENC_MBC_CASE_FOLD_MAXLEN 18
/* 18: 6(max-byte) * 3(case-fold chars) */
/* character types */
#define ONIGENC_CTYPE_NEWLINE 0
#define ONIGENC_CTYPE_ALPHA 1
#define ONIGENC_CTYPE_BLANK 2
#define ONIGENC_CTYPE_CNTRL 3
#define ONIGENC_CTYPE_DIGIT 4
#define ONIGENC_CTYPE_GRAPH 5
#define ONIGENC_CTYPE_LOWER 6
#define ONIGENC_CTYPE_PRINT 7
#define ONIGENC_CTYPE_PUNCT 8
#define ONIGENC_CTYPE_SPACE 9
#define ONIGENC_CTYPE_UPPER 10
#define ONIGENC_CTYPE_XDIGIT 11
#define ONIGENC_CTYPE_WORD 12
#define ONIGENC_CTYPE_ALNUM 13 /* alpha || digit */
#define ONIGENC_CTYPE_ASCII 14
#define ONIGENC_MAX_STD_CTYPE ONIGENC_CTYPE_ASCII
/* flags */
#define ONIGENC_FLAG_NONE 0U
#define ONIGENC_FLAG_UNICODE 1U
#define onig_enc_len(enc,p,end) ONIGENC_MBC_ENC_LEN(enc,p)
#define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF)
#define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1)
#define ONIGENC_IS_MBC_HEAD(enc,p) (ONIGENC_MBC_ENC_LEN(enc,p) != 1)
#define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128)
#define ONIGENC_IS_CODE_ASCII(code) ((code) < 128)
#define ONIGENC_IS_MBC_WORD(enc,s,end) \
ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE(enc,s,end))
#define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \
onigenc_ascii_is_code_ctype( \
ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD)
#define ONIGENC_IS_UNICODE(enc) ((enc)->flags & ONIGENC_FLAG_UNICODE)
#define ONIGENC_NAME(enc) ((enc)->name)
#define ONIGENC_MBC_CASE_FOLD(enc,flag,pp,end,buf) \
(enc)->mbc_case_fold(flag,(const OnigUChar** )pp,end,buf)
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
(enc)->is_allowed_reverse_match(s,end)
#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \
(enc)->left_adjust_char_head(start, s)
#define ONIGENC_APPLY_ALL_CASE_FOLD(enc,case_fold_flag,f,arg) \
(enc)->apply_all_case_fold(case_fold_flag,f,arg)
#define ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc,case_fold_flag,p,end,acs) \
(enc)->get_case_fold_codes_by_str(case_fold_flag,p,end,acs)
#define ONIGENC_STEP_BACK(enc,start,s,n) \
onigenc_step_back((enc),(start),(s),(n))
#define ONIGENC_MBC_ENC_LEN(enc,p) (enc)->mbc_enc_len(p)
#define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len)
#define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc)
#define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len)
#define ONIGENC_IS_MBC_NEWLINE(enc,p,end) (enc)->is_mbc_newline((p),(end))
#define ONIGENC_MBC_TO_CODE(enc,p,end) (enc)->mbc_to_code((p),(end))
#define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code)
#define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf)
#define ONIGENC_PROPERTY_NAME_TO_CTYPE(enc,p,end) \
(enc)->property_name_to_ctype(enc,p,end)
#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->is_code_ctype(code,ctype)
#define ONIGENC_IS_CODE_NEWLINE(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_NEWLINE)
#define ONIGENC_IS_CODE_GRAPH(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_GRAPH)
#define ONIGENC_IS_CODE_PRINT(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PRINT)
#define ONIGENC_IS_CODE_ALNUM(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALNUM)
#define ONIGENC_IS_CODE_ALPHA(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALPHA)
#define ONIGENC_IS_CODE_LOWER(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_LOWER)
#define ONIGENC_IS_CODE_UPPER(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_UPPER)
#define ONIGENC_IS_CODE_CNTRL(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_CNTRL)
#define ONIGENC_IS_CODE_PUNCT(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PUNCT)
#define ONIGENC_IS_CODE_SPACE(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_SPACE)
#define ONIGENC_IS_CODE_BLANK(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_BLANK)
#define ONIGENC_IS_CODE_DIGIT(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_DIGIT)
#define ONIGENC_IS_CODE_XDIGIT(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_XDIGIT)
#define ONIGENC_IS_CODE_WORD(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD)
#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbout,ranges) \
(enc)->get_ctype_code_range(ctype,sbout,ranges)
ONIG_EXTERN
OnigUChar* onigenc_step_back P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, int n));
/* encoding API */
ONIG_EXTERN
int onigenc_init P_((void));
ONIG_EXTERN
int onigenc_set_default_encoding P_((OnigEncoding enc));
ONIG_EXTERN
OnigEncoding onigenc_get_default_encoding P_((void));
ONIG_EXTERN
void onigenc_set_default_caseconv_table P_((const OnigUChar* table));
ONIG_EXTERN
OnigUChar* onigenc_get_right_adjust_char_head_with_prev P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar** prev));
ONIG_EXTERN
OnigUChar* onigenc_get_prev_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s));
ONIG_EXTERN
OnigUChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s));
ONIG_EXTERN
OnigUChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s));
ONIG_EXTERN
int onigenc_strlen P_((OnigEncoding enc, const OnigUChar* p, const OnigUChar* end));
ONIG_EXTERN
int onigenc_strlen_null P_((OnigEncoding enc, const OnigUChar* p));
ONIG_EXTERN
int onigenc_str_bytelen_null P_((OnigEncoding enc, const OnigUChar* p));
/* PART: regular expression */
/* config parameters */
#define ONIG_NREGION 10
#define ONIG_MAX_BACKREF_NUM 1000
#define ONIG_MAX_REPEAT_NUM 100000
#define ONIG_MAX_MULTI_BYTE_RANGES_NUM 10000
/* constants */
#define ONIG_MAX_ERROR_MESSAGE_LEN 90
typedef unsigned int OnigOptionType;
#define ONIG_OPTION_DEFAULT ONIG_OPTION_NONE
/* options */
#define ONIG_OPTION_NONE 0U
#define ONIG_OPTION_IGNORECASE 1U
#define ONIG_OPTION_EXTEND (ONIG_OPTION_IGNORECASE << 1)
#define ONIG_OPTION_MULTILINE (ONIG_OPTION_EXTEND << 1)
#define ONIG_OPTION_DOTALL ONIG_OPTION_MULTILINE
#define ONIG_OPTION_SINGLELINE (ONIG_OPTION_MULTILINE << 1)
#define ONIG_OPTION_FIND_LONGEST (ONIG_OPTION_SINGLELINE << 1)
#define ONIG_OPTION_FIND_NOT_EMPTY (ONIG_OPTION_FIND_LONGEST << 1)
#define ONIG_OPTION_NEGATE_SINGLELINE (ONIG_OPTION_FIND_NOT_EMPTY << 1)
#define ONIG_OPTION_DONT_CAPTURE_GROUP (ONIG_OPTION_NEGATE_SINGLELINE << 1)
#define ONIG_OPTION_CAPTURE_GROUP (ONIG_OPTION_DONT_CAPTURE_GROUP << 1)
/* options (search time) */
#define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1)
#define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1)
#define ONIG_OPTION_POSIX_REGION (ONIG_OPTION_NOTEOL << 1)
/* options (ctype range) */
#define ONIG_OPTION_ASCII_RANGE (ONIG_OPTION_POSIX_REGION << 1)
#define ONIG_OPTION_POSIX_BRACKET_ALL_RANGE (ONIG_OPTION_ASCII_RANGE << 1)
#define ONIG_OPTION_WORD_BOUND_ALL_RANGE (ONIG_OPTION_POSIX_BRACKET_ALL_RANGE << 1)
/* options (newline) */
#define ONIG_OPTION_NEWLINE_CRLF (ONIG_OPTION_WORD_BOUND_ALL_RANGE << 1)
#define ONIG_OPTION_MAXBIT ONIG_OPTION_NEWLINE_CRLF /* limit */
#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt))
#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt))
#define ONIG_IS_OPTION_ON(options,option) ((options) & (option))
/* syntax */
typedef struct {
unsigned int op;
unsigned int op2;
unsigned int behavior;
OnigOptionType options; /* default option */
OnigMetaCharTableType meta_char_table;
} OnigSyntaxType;
ONIG_EXTERN OnigSyntaxType OnigSyntaxASIS;
ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic;
ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended;
ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs;
ONIG_EXTERN OnigSyntaxType OnigSyntaxGrep;
ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex;
ONIG_EXTERN OnigSyntaxType OnigSyntaxJava;
ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl58;
ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl58_NG;
ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl;
ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby;
ONIG_EXTERN OnigSyntaxType OnigSyntaxPython;
/* predefined syntaxes (see regsyntax.c) */
#define ONIG_SYNTAX_ASIS (&OnigSyntaxASIS)
#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic)
#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended)
#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs)
#define ONIG_SYNTAX_GREP (&OnigSyntaxGrep)
#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex)
#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava)
#define ONIG_SYNTAX_PERL58 (&OnigSyntaxPerl58)
#define ONIG_SYNTAX_PERL58_NG (&OnigSyntaxPerl58_NG)
#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl)
#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby)
#define ONIG_SYNTAX_PYTHON (&OnigSyntaxPython)
/* default syntax */
ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax
/* syntax (operators) */
#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS (1U<<0)
#define ONIG_SYN_OP_DOT_ANYCHAR (1U<<1) /* . */
#define ONIG_SYN_OP_ASTERISK_ZERO_INF (1U<<2) /* * */
#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF (1U<<3)
#define ONIG_SYN_OP_PLUS_ONE_INF (1U<<4) /* + */
#define ONIG_SYN_OP_ESC_PLUS_ONE_INF (1U<<5)
#define ONIG_SYN_OP_QMARK_ZERO_ONE (1U<<6) /* ? */
#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE (1U<<7)
#define ONIG_SYN_OP_BRACE_INTERVAL (1U<<8) /* {lower,upper} */
#define ONIG_SYN_OP_ESC_BRACE_INTERVAL (1U<<9) /* \{lower,upper\} */
#define ONIG_SYN_OP_VBAR_ALT (1U<<10) /* | */
#define ONIG_SYN_OP_ESC_VBAR_ALT (1U<<11) /* \| */
#define ONIG_SYN_OP_LPAREN_SUBEXP (1U<<12) /* (...) */
#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP (1U<<13) /* \(...\) */
#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (1U<<14) /* \A, \Z, \z */
#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (1U<<15) /* \G */
#define ONIG_SYN_OP_DECIMAL_BACKREF (1U<<16) /* \num */
#define ONIG_SYN_OP_BRACKET_CC (1U<<17) /* [...] */
#define ONIG_SYN_OP_ESC_W_WORD (1U<<18) /* \w, \W */
#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END (1U<<19) /* \<. \> */
#define ONIG_SYN_OP_ESC_B_WORD_BOUND (1U<<20) /* \b, \B */
#define ONIG_SYN_OP_ESC_S_WHITE_SPACE (1U<<21) /* \s, \S */
#define ONIG_SYN_OP_ESC_D_DIGIT (1U<<22) /* \d, \D */
#define ONIG_SYN_OP_LINE_ANCHOR (1U<<23) /* ^, $ */
#define ONIG_SYN_OP_POSIX_BRACKET (1U<<24) /* [:xxxx:] */
#define ONIG_SYN_OP_QMARK_NON_GREEDY (1U<<25) /* ??,*?,+?,{n,m}? */
#define ONIG_SYN_OP_ESC_CONTROL_CHARS (1U<<26) /* \n,\r,\t,\a ... */
#define ONIG_SYN_OP_ESC_C_CONTROL (1U<<27) /* \cx */
#define ONIG_SYN_OP_ESC_OCTAL3 (1U<<28) /* \OOO */
#define ONIG_SYN_OP_ESC_X_HEX2 (1U<<29) /* \xHH */
#define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1U<<30) /* \x{7HHHHHHH} */
#define ONIG_SYN_OP_ESC_O_BRACE_OCTAL (1U<<31) /* \o{OOO} */ /* NOTIMPL */
#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1U<<0) /* \Q...\E */
#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1U<<1) /* (?...) */
#define ONIG_SYN_OP2_OPTION_PERL (1U<<2) /* (?imsxadlu), (?-imsx), (?^imsxalu) */
#define ONIG_SYN_OP2_OPTION_RUBY (1U<<3) /* (?imxadu), (?-imx) */
#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (1U<<4) /* ?+,*+,++ */
#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL (1U<<5) /* {n,m}+ */
#define ONIG_SYN_OP2_CCLASS_SET_OP (1U<<6) /* [...&&..[..]..] */
#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (1U<<7) /* (?<name>...) */
#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (1U<<8) /* \k<name> */
#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (1U<<9) /* \g<name>, \g<n> */
#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY (1U<<10) /* (?@..),(?@<x>..) */
#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1U<<11) /* \C-x */
#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (1U<<12) /* \M-x */
#define ONIG_SYN_OP2_ESC_V_VTAB (1U<<13) /* \v as VTAB */
#define ONIG_SYN_OP2_ESC_U_HEX4 (1U<<14) /* \uHHHH */
#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1U<<15) /* \`, \' */
#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (1U<<16) /* \p{...}, \P{...} */
#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1U<<17) /* \p{^..}, \P{^..} */
/* #define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1U<<18) */
#define ONIG_SYN_OP2_ESC_H_XDIGIT (1U<<19) /* \h, \H */
#define ONIG_SYN_OP2_INEFFECTIVE_ESCAPE (1U<<20) /* \ */
#define ONIG_SYN_OP2_ESC_CAPITAL_R_LINEBREAK (1U<<21) /* \R as (?>\x0D\x0A|[\x0A-\x0D\x{85}\x{2028}\x{2029}]) */
#define ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER (1U<<22) /* \X as (?>\P{M}\p{M}*) */
#define ONIG_SYN_OP2_ESC_V_VERTICAL_WHITESPACE (1U<<23) /* \v, \V -- Perl */ /* NOTIMPL */
#define ONIG_SYN_OP2_ESC_H_HORIZONTAL_WHITESPACE (1U<<24) /* \h, \H -- Perl */ /* NOTIMPL */
#define ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP (1U<<25) /* \K */
#define ONIG_SYN_OP2_ESC_G_BRACE_BACKREF (1U<<26) /* \g{name}, \g{n} */
#define ONIG_SYN_OP2_QMARK_SUBEXP_CALL (1U<<27) /* (?&name), (?n), (?R), (?0) */
#define ONIG_SYN_OP2_QMARK_VBAR_BRANCH_RESET (1U<<28) /* (?|...) */ /* NOTIMPL */
#define ONIG_SYN_OP2_QMARK_LPAREN_CONDITION (1U<<29) /* (?(cond)yes...|no...) */
#define ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP (1U<<30) /* (?P<name>...), (?P=name), (?P>name) -- Python/PCRE */
#define ONIG_SYN_OP2_OPTION_JAVA (1U<<31) /* (?idmsux), (?-idmsux) */ /* NOTIMPL */
/* syntax (behavior) */
#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1U<<31) /* not implemented */
#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (1U<<0) /* ?, *, +, {n,m} */
#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (1U<<1) /* error or ignore */
#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1U<<2) /* ...)... */
#define ONIG_SYN_ALLOW_INVALID_INTERVAL (1U<<3) /* {??? */
#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (1U<<4) /* {,n} => {0,n} */
#define ONIG_SYN_STRICT_CHECK_BACKREF (1U<<5) /* /(\1)/,/\1()/ ..*/
#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1U<<6) /* (?<=a|bc) */
#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1U<<7) /* see doc/RE */
#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1U<<8) /* (?<x>)(?<x>) */
#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (1U<<9) /* a{n}?=(?:a{n})? */
#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL (1U<<10) /* (?<x>)(?<x>)(?&x) */
/* syntax (behavior) in char class [...] */
#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1U<<20) /* [^...] */
#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1U<<21) /* [..\w..] etc.. */
#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1U<<22)
#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1U<<23) /* [0-9-a]=[0-9\-a] */
/* syntax (behavior) warning */
#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1U<<24) /* [,-,] */
#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1U<<25) /* (?:a*)+ */
/* meta character specifiers (onig_set_meta_char()) */
#define ONIG_META_CHAR_ESCAPE 0
#define ONIG_META_CHAR_ANYCHAR 1
#define ONIG_META_CHAR_ANYTIME 2
#define ONIG_META_CHAR_ZERO_OR_ONE_TIME 3
#define ONIG_META_CHAR_ONE_OR_MORE_TIME 4
#define ONIG_META_CHAR_ANYCHAR_ANYTIME 5
#define ONIG_INEFFECTIVE_META_CHAR 0
/* error codes */
#define ONIG_IS_PATTERN_ERROR(ecode) ((ecode) <= -100 && (ecode) > -1000)
/* normal return */
#define ONIG_NORMAL 0
#define ONIG_MISMATCH -1
#define ONIG_NO_SUPPORT_CONFIG -2
/* internal error */
#define ONIGERR_MEMORY -5
#define ONIGERR_TYPE_BUG -6
#define ONIGERR_PARSER_BUG -11
#define ONIGERR_STACK_BUG -12
#define ONIGERR_UNDEFINED_BYTECODE -13
#define ONIGERR_UNEXPECTED_BYTECODE -14
#define ONIGERR_MATCH_STACK_LIMIT_OVER -15
#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SET -21
#define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22
/* general error */
#define ONIGERR_INVALID_ARGUMENT -30
/* syntax error */
#define ONIGERR_END_PATTERN_AT_LEFT_BRACE -100
#define ONIGERR_END_PATTERN_AT_LEFT_BRACKET -101
#define ONIGERR_EMPTY_CHAR_CLASS -102
#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS -103
#define ONIGERR_END_PATTERN_AT_ESCAPE -104
#define ONIGERR_END_PATTERN_AT_META -105
#define ONIGERR_END_PATTERN_AT_CONTROL -106
#define ONIGERR_META_CODE_SYNTAX -108
#define ONIGERR_CONTROL_CODE_SYNTAX -109
#define ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE -110
#define ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE -111
#define ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS -112
#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED -113
#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID -114
#define ONIGERR_NESTED_REPEAT_OPERATOR -115
#define ONIGERR_UNMATCHED_CLOSE_PARENTHESIS -116
#define ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS -117
#define ONIGERR_END_PATTERN_IN_GROUP -118
#define ONIGERR_UNDEFINED_GROUP_OPTION -119
#define ONIGERR_INVALID_POSIX_BRACKET_TYPE -121
#define ONIGERR_INVALID_LOOK_BEHIND_PATTERN -122
#define ONIGERR_INVALID_REPEAT_RANGE_PATTERN -123
#define ONIGERR_INVALID_CONDITION_PATTERN -124
/* values error (syntax error) */
#define ONIGERR_TOO_BIG_NUMBER -200
#define ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE -201
#define ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE -202
#define ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS -203
#define ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE -204
#define ONIGERR_TOO_MANY_MULTI_BYTE_RANGES -205
#define ONIGERR_TOO_SHORT_MULTI_BYTE_STRING -206
#define ONIGERR_TOO_BIG_BACKREF_NUMBER -207
#define ONIGERR_INVALID_BACKREF -208
#define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED -209
#define ONIGERR_TOO_SHORT_DIGITS -210
#define ONIGERR_TOO_LONG_WIDE_CHAR_VALUE -212
#define ONIGERR_EMPTY_GROUP_NAME -214
#define ONIGERR_INVALID_GROUP_NAME -215
#define ONIGERR_INVALID_CHAR_IN_GROUP_NAME -216
#define ONIGERR_UNDEFINED_NAME_REFERENCE -217
#define ONIGERR_UNDEFINED_GROUP_REFERENCE -218
#define ONIGERR_MULTIPLEX_DEFINED_NAME -219
#define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL -220
#define ONIGERR_NEVER_ENDING_RECURSION -221
#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222
#define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223
#define ONIGERR_INVALID_CODE_POINT_VALUE -400
#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400
#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401
#define ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION -402
#define ONIGERR_INVALID_COMBINATION_OF_OPTIONS -403
/* errors related to thread */
#define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001
/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */
#define ONIG_MAX_CAPTURE_HISTORY_GROUP 31
#define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \
((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i])
typedef struct OnigCaptureTreeNodeStruct {
int group; /* group number */
OnigPosition beg;
OnigPosition end;
int allocated;
int num_childs;
struct OnigCaptureTreeNodeStruct** childs;
} OnigCaptureTreeNode;
/* match result region type */
struct re_registers {
int allocated;
int num_regs;
OnigPosition* beg;
OnigPosition* end;
/* extended */
OnigCaptureTreeNode* history_root; /* capture history tree root */
};
/* capture tree traverse */
#define ONIG_TRAVERSE_CALLBACK_AT_FIRST 1
#define ONIG_TRAVERSE_CALLBACK_AT_LAST 2
#define ONIG_TRAVERSE_CALLBACK_AT_BOTH \
( ONIG_TRAVERSE_CALLBACK_AT_FIRST | ONIG_TRAVERSE_CALLBACK_AT_LAST )
#define ONIG_REGION_NOTPOS -1
typedef struct re_registers OnigRegion;
typedef struct {
OnigEncoding enc;
OnigUChar* par;
OnigUChar* par_end;
} OnigErrorInfo;
typedef struct {
int lower;
int upper;
} OnigRepeatRange;
typedef void (*OnigWarnFunc) P_((const char* s));
extern void onig_null_warn P_((const char* s));
#define ONIG_NULL_WARN onig_null_warn
#define ONIG_CHAR_TABLE_SIZE 256
/* regex_t state */
#define ONIG_STATE_NORMAL 0
#define ONIG_STATE_SEARCHING 1
#define ONIG_STATE_COMPILING -1
#define ONIG_STATE_MODIFY -2
#define ONIG_STATE(reg) \
((reg)->state > 0 ? ONIG_STATE_SEARCHING : (reg)->state)
typedef struct re_pattern_buffer {
/* common members of BBuf(bytes-buffer) */
unsigned char* p; /* compiled pattern */
unsigned int used; /* used space for p */
unsigned int alloc; /* allocated space for p */
int state; /* normal, searching, compiling */
int num_mem; /* used memory(...) num counted from 1 */
int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
int num_null_check; /* OP_NULL_CHECK_START/END id counter */
int num_comb_exp_check; /* combination explosion check */
int num_call; /* number of subexp call */
unsigned int capture_history; /* (?@...) flag (1-31) */
unsigned int bt_mem_start; /* need backtrack flag */
unsigned int bt_mem_end; /* need backtrack flag */
int stack_pop_level;
int repeat_range_alloc;
OnigRepeatRange* repeat_range;
OnigEncoding enc;
OnigOptionType options;
OnigSyntaxType* syntax;
OnigCaseFoldType case_fold_flag;
void* name_table;
/* optimization info (string search, char-map and anchors) */
int optimize; /* optimize flag */
int threshold_len; /* search str-length for apply optimize */
int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
OnigDistance anchor_dmin; /* (SEMI_)END_BUF anchor distance */
OnigDistance anchor_dmax; /* (SEMI_)END_BUF anchor distance */
int sub_anchor; /* start-anchor for exact or map */
unsigned char *exact;
unsigned char *exact_end;
unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */
int *int_map; /* BM skip for exact_len > 255 */
int *int_map_backward; /* BM skip for backward search */
OnigDistance dmin; /* min-distance of exact or map */
OnigDistance dmax; /* max-distance of exact or map */
/* regex_t link chain */
struct re_pattern_buffer* chain; /* escape compile-conflict */
} OnigRegexType;
typedef OnigRegexType* OnigRegex;
#ifndef ONIG_ESCAPE_REGEX_T_COLLISION
typedef OnigRegexType regex_t;
#endif
typedef struct {
int num_of_elements;
OnigEncoding pattern_enc;
OnigEncoding target_enc;
OnigSyntaxType* syntax;
OnigOptionType option;
OnigCaseFoldType case_fold_flag;
} OnigCompileInfo;
/* Oniguruma Native API */
ONIG_EXTERN
int onig_init P_((void));
ONIG_EXTERN
int onig_error_code_to_str PV_((OnigUChar* s, OnigPosition err_code, ...));
ONIG_EXTERN
void onig_set_warn_func P_((OnigWarnFunc f));
ONIG_EXTERN
void onig_set_verb_warn_func P_((OnigWarnFunc f));
ONIG_EXTERN
int onig_new P_((OnigRegex*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
ONIG_EXTERN
int onig_reg_init P_((OnigRegex reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, OnigSyntaxType* syntax));
ONIG_EXTERN
int onig_new_without_alloc P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
ONIG_EXTERN
int onig_new_deluxe P_((OnigRegex* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
ONIG_EXTERN
void onig_free P_((OnigRegex));
ONIG_EXTERN
void onig_free_body P_((OnigRegex));
ONIG_EXTERN
int onig_recompile P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
ONIG_EXTERN
int onig_recompile_deluxe P_((OnigRegex reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
ONIG_EXTERN
OnigPosition onig_search P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option));
ONIG_EXTERN
OnigPosition onig_search_gpos P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* global_pos, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option));
ONIG_EXTERN
OnigPosition onig_match P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option));
ONIG_EXTERN
OnigRegion* onig_region_new P_((void));
ONIG_EXTERN
void onig_region_init P_((OnigRegion* region));
ONIG_EXTERN
void onig_region_free P_((OnigRegion* region, int free_self));
ONIG_EXTERN
void onig_region_copy P_((OnigRegion* to, OnigRegion* from));
ONIG_EXTERN
void onig_region_clear P_((OnigRegion* region));
ONIG_EXTERN
int onig_region_resize P_((OnigRegion* region, int n));
ONIG_EXTERN
int onig_region_set P_((OnigRegion* region, int at, int beg, int end));
ONIG_EXTERN
int onig_name_to_group_numbers P_((OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, int** nums));
ONIG_EXTERN
int onig_name_to_backref_number P_((OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, OnigRegion *region));
ONIG_EXTERN
int onig_foreach_name P_((OnigRegex reg, int (*func)(const OnigUChar*, const OnigUChar*,int,int*,OnigRegex,void*), void* arg));
ONIG_EXTERN
int onig_number_of_names P_((OnigRegex reg));
ONIG_EXTERN
int onig_number_of_captures P_((OnigRegex reg));
ONIG_EXTERN
int onig_number_of_capture_histories P_((OnigRegex reg));
ONIG_EXTERN
OnigCaptureTreeNode* onig_get_capture_tree P_((OnigRegion* region));
ONIG_EXTERN
int onig_capture_tree_traverse P_((OnigRegion* region, int at, int(*callback_func)(int,OnigPosition,OnigPosition,int,int,void*), void* arg));
ONIG_EXTERN
int onig_noname_group_capture_is_active P_((OnigRegex reg));
ONIG_EXTERN
OnigEncoding onig_get_encoding P_((OnigRegex reg));
ONIG_EXTERN
OnigOptionType onig_get_options P_((OnigRegex reg));
ONIG_EXTERN
OnigCaseFoldType onig_get_case_fold_flag P_((OnigRegex reg));
ONIG_EXTERN
OnigSyntaxType* onig_get_syntax P_((OnigRegex reg));
ONIG_EXTERN
int onig_set_default_syntax P_((OnigSyntaxType* syntax));
ONIG_EXTERN
void onig_copy_syntax P_((OnigSyntaxType* to, const OnigSyntaxType* from));
ONIG_EXTERN
unsigned int onig_get_syntax_op P_((OnigSyntaxType* syntax));
ONIG_EXTERN
unsigned int onig_get_syntax_op2 P_((OnigSyntaxType* syntax));
ONIG_EXTERN
unsigned int onig_get_syntax_behavior P_((OnigSyntaxType* syntax));
ONIG_EXTERN
OnigOptionType onig_get_syntax_options P_((OnigSyntaxType* syntax));
ONIG_EXTERN
void onig_set_syntax_op P_((OnigSyntaxType* syntax, unsigned int op));
ONIG_EXTERN
void onig_set_syntax_op2 P_((OnigSyntaxType* syntax, unsigned int op2));
ONIG_EXTERN
void onig_set_syntax_behavior P_((OnigSyntaxType* syntax, unsigned int behavior));
ONIG_EXTERN
void onig_set_syntax_options P_((OnigSyntaxType* syntax, OnigOptionType options));
ONIG_EXTERN
int onig_set_meta_char P_((OnigSyntaxType* syntax, unsigned int what, OnigCodePoint code));
ONIG_EXTERN
void onig_copy_encoding P_((OnigEncoding to, OnigEncoding from));
ONIG_EXTERN
OnigCaseFoldType onig_get_default_case_fold_flag P_((void));
ONIG_EXTERN
int onig_set_default_case_fold_flag P_((OnigCaseFoldType case_fold_flag));
ONIG_EXTERN
unsigned int onig_get_match_stack_limit_size P_((void));
ONIG_EXTERN
int onig_set_match_stack_limit_size P_((unsigned int size));
ONIG_EXTERN
int onig_end P_((void));
ONIG_EXTERN
const char* onig_version P_((void));
ONIG_EXTERN
const char* onig_copyright P_((void));
#ifdef __cplusplus
}
#endif
#endif /* ONIGURUMA_H */

View File

@ -0,0 +1,14 @@
prefix=@prefix@
exec_prefix=@exec_prefix@
libdir=@libdir@
includedir=@includedir@
datarootdir=@datarootdir@
datadir=@datadir@
Name: oniguruma
Description: Regular expression library
Version: @PACKAGE_VERSION@
Requires:
Libs: -L${libdir} -lonig
Cflags: -I${includedir}

6677
src/Onigmo/regcomp.c Normal file

File diff suppressed because it is too large Load Diff

933
src/Onigmo/regenc.c Normal file
View File

@ -0,0 +1,933 @@
/**********************************************************************
regenc.c - Onigmo (Oniguruma-mod) (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regint.h"
OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
extern int
onigenc_init(void)
{
return 0;
}
extern OnigEncoding
onigenc_get_default_encoding(void)
{
return OnigEncDefaultCharEncoding;
}
extern int
onigenc_set_default_encoding(OnigEncoding enc)
{
OnigEncDefaultCharEncoding = enc;
return 0;
}
extern UChar*
onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
{
UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
if (p < s) {
p += enclen(enc, p);
}
return p;
}
extern UChar*
onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
const UChar* start, const UChar* s, const UChar** prev)
{
UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
if (p < s) {
if (prev) *prev = (const UChar* )p;
p += enclen(enc, p);
}
else {
if (prev) *prev = (const UChar* )NULL; /* Sorry */
}
return p;
}
extern UChar*
onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
{
if (s <= start)
return (UChar* )NULL;
return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
}
extern UChar*
onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n)
{
while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
if (s <= start)
return (UChar* )NULL;
s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
}
return (UChar* )s;
}
extern UChar*
onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)
{
UChar* q = (UChar* )p;
while (n-- > 0) {
q += ONIGENC_MBC_ENC_LEN(enc, q);
}
return (q <= end ? q : NULL);
}
extern int
onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
{
int n = 0;
UChar* q = (UChar* )p;
while (q < end) {
q += ONIGENC_MBC_ENC_LEN(enc, q);
n++;
}
return n;
}
extern int
onigenc_strlen_null(OnigEncoding enc, const UChar* s)
{
int n = 0;
UChar* p = (UChar* )s;
while (1) {
if (*p == '\0') {
UChar* q;
int len = ONIGENC_MBC_MINLEN(enc);
if (len == 1) return n;
q = p + 1;
while (len > 1) {
if (*q != '\0') break;
q++;
len--;
}
if (len == 1) return n;
}
p += ONIGENC_MBC_ENC_LEN(enc, p);
n++;
}
}
extern int
onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
{
UChar* start = (UChar* )s;
UChar* p = (UChar* )s;
while (1) {
if (*p == '\0') {
UChar* q;
int len = ONIGENC_MBC_MINLEN(enc);
if (len == 1) return (int )(p - start);
q = p + 1;
while (len > 1) {
if (*q != '\0') break;
q++;
len--;
}
if (len == 1) return (int )(p - start);
}
p += ONIGENC_MBC_ENC_LEN(enc, p);
}
}
const UChar OnigEncAsciiToLowerCaseTable[] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
};
#ifdef USE_UPPER_CASE_TABLE
const UChar OnigEncAsciiToUpperCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
'\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
'\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
'\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
'\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
'\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
'\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
'\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
};
#endif
const unsigned short OnigEncAsciiCtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
};
const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
#ifdef USE_UPPER_CASE_TABLE
const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
'\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
'\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
'\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
'\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
'\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
'\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
'\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367',
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377',
};
#endif
extern void
onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED)
{
/* nothing */
/* obsoleted. */
}
extern UChar*
onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
{
return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
}
const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = {
{ 0x41, 0x61 },
{ 0x42, 0x62 },
{ 0x43, 0x63 },
{ 0x44, 0x64 },
{ 0x45, 0x65 },
{ 0x46, 0x66 },
{ 0x47, 0x67 },
{ 0x48, 0x68 },
{ 0x49, 0x69 },
{ 0x4a, 0x6a },
{ 0x4b, 0x6b },
{ 0x4c, 0x6c },
{ 0x4d, 0x6d },
{ 0x4e, 0x6e },
{ 0x4f, 0x6f },
{ 0x50, 0x70 },
{ 0x51, 0x71 },
{ 0x52, 0x72 },
{ 0x53, 0x73 },
{ 0x54, 0x74 },
{ 0x55, 0x75 },
{ 0x56, 0x76 },
{ 0x57, 0x77 },
{ 0x58, 0x78 },
{ 0x59, 0x79 },
{ 0x5a, 0x7a }
};
extern int
onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
OnigApplyAllCaseFoldFunc f, void* arg)
{
OnigCodePoint code;
int i, r;
for (i = 0;
i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes));
i++) {
code = OnigAsciiLowerMap[i].to;
r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg);
if (r != 0) return r;
code = OnigAsciiLowerMap[i].from;
r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg);
if (r != 0) return r;
}
return 0;
}
extern int
onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
const OnigUChar* p, const OnigUChar* end ARG_UNUSED,
OnigCaseFoldCodeItem items[])
{
if (0x41 <= *p && *p <= 0x5a) {
items[0].byte_len = 1;
items[0].code_len = 1;
items[0].code[0] = (OnigCodePoint )(*p + 0x20);
return 1;
}
else if (0x61 <= *p && *p <= 0x7a) {
items[0].byte_len = 1;
items[0].code_len = 1;
items[0].code[0] = (OnigCodePoint )(*p - 0x20);
return 1;
}
else
return 0;
}
static int
ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
OnigApplyAllCaseFoldFunc f, void* arg)
{
static OnigCodePoint ss[] = { 0x73, 0x73 };
return (*f)((OnigCodePoint )0xdf, ss, 2, arg);
}
extern int
onigenc_apply_all_case_fold_with_map(int map_size,
const OnigPairCaseFoldCodes map[],
int ess_tsett_flag, OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg)
{
OnigCodePoint code;
int i, r;
r = onigenc_ascii_apply_all_case_fold(flag, f, arg);
if (r != 0) return r;
for (i = 0; i < map_size; i++) {
code = map[i].to;
r = (*f)(map[i].from, &code, 1, arg);
if (r != 0) return r;
code = map[i].from;
r = (*f)(map[i].to, &code, 1, arg);
if (r != 0) return r;
}
if (ess_tsett_flag != 0)
return ss_apply_all_case_fold(flag, f, arg);
return 0;
}
extern int
onigenc_get_case_fold_codes_by_str_with_map(int map_size,
const OnigPairCaseFoldCodes map[],
int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
if (0x41 <= *p && *p <= 0x5a) {
items[0].byte_len = 1;
items[0].code_len = 1;
items[0].code[0] = (OnigCodePoint )(*p + 0x20);
if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1
&& (*(p+1) == 0x53 || *(p+1) == 0x73)) {
/* SS */
items[1].byte_len = 2;
items[1].code_len = 1;
items[1].code[0] = (OnigCodePoint )0xdf;
return 2;
}
else
return 1;
}
else if (0x61 <= *p && *p <= 0x7a) {
items[0].byte_len = 1;
items[0].code_len = 1;
items[0].code[0] = (OnigCodePoint )(*p - 0x20);
if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1
&& (*(p+1) == 0x73 || *(p+1) == 0x53)) {
/* ss */
items[1].byte_len = 2;
items[1].code_len = 1;
items[1].code[0] = (OnigCodePoint )0xdf;
return 2;
}
else
return 1;
}
else if (*p == 0xdf && ess_tsett_flag != 0) {
items[0].byte_len = 1;
items[0].code_len = 2;
items[0].code[0] = (OnigCodePoint )'s';
items[0].code[1] = (OnigCodePoint )'s';
items[1].byte_len = 1;
items[1].code_len = 2;
items[1].code[0] = (OnigCodePoint )'S';
items[1].code[1] = (OnigCodePoint )'S';
items[2].byte_len = 1;
items[2].code_len = 2;
items[2].code[0] = (OnigCodePoint )'s';
items[2].code[1] = (OnigCodePoint )'S';
items[3].byte_len = 1;
items[3].code_len = 2;
items[3].code[0] = (OnigCodePoint )'S';
items[3].code[1] = (OnigCodePoint )'s';
return 4;
}
else {
int i;
for (i = 0; i < map_size; i++) {
if (*p == map[i].from) {
items[0].byte_len = 1;
items[0].code_len = 1;
items[0].code[0] = map[i].to;
return 1;
}
else if (*p == map[i].to) {
items[0].byte_len = 1;
items[0].code_len = 1;
items[0].code[0] = map[i].from;
return 1;
}
}
}
return 0;
}
extern int
onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED,
OnigCodePoint* sb_out ARG_UNUSED,
const OnigCodePoint* ranges[] ARG_UNUSED)
{
return ONIG_NO_SUPPORT_CONFIG;
}
extern int
onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end)
{
if (p < end) {
if (*p == 0x0a) return 1;
}
return 0;
}
/* for single byte encodings */
extern int
onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,
const UChar*end ARG_UNUSED, UChar* lower)
{
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
(*p)++;
return 1; /* return byte length of converted char to lower */
}
#if 0
extern int
onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag,
const UChar** pp, const UChar* end)
{
const UChar* p = *pp;
(*pp)++;
return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
}
#endif
extern int
onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED)
{
return 1;
}
extern OnigCodePoint
onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
{
return (OnigCodePoint )(*p);
}
extern int
onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED)
{
return (code < 0x100 ? 1 : ONIGERR_INVALID_CODE_POINT_VALUE);
}
extern int
onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf)
{
*buf = (UChar )(code & 0xff);
return 1;
}
extern UChar*
onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED,
const UChar* s)
{
return (UChar* )s;
}
extern int
onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED,
const UChar* end ARG_UNUSED)
{
return TRUE;
}
extern int
onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED,
const UChar* end ARG_UNUSED)
{
return FALSE;
}
extern int
onigenc_ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
else
return FALSE;
}
extern OnigCodePoint
onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
{
int c, i, len;
OnigCodePoint n;
len = enclen(enc, p);
n = (OnigCodePoint )(*p++);
if (len == 1) return n;
for (i = 1; i < len; i++) {
if (p >= end) break;
c = *p++;
n <<= 8; n += c;
}
return n;
}
extern int
onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
const UChar** pp, const UChar* end ARG_UNUSED,
UChar* lower)
{
int len;
const UChar *p = *pp;
if (ONIGENC_IS_MBC_ASCII(p)) {
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
(*pp)++;
return 1;
}
else {
int i;
len = enclen(enc, p);
for (i = 0; i < len; i++) {
*lower++ = *p++;
}
(*pp) += len;
return len; /* return byte length of converted to lower char */
}
}
#if 0
extern int
onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,
const UChar** pp, const UChar* end)
{
const UChar* p = *pp;
if (ONIGENC_IS_MBC_ASCII(p)) {
(*pp)++;
return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
}
(*pp) += enclen(enc, p);
return FALSE;
}
#endif
extern int
onigenc_mb2_code_to_mbclen(OnigCodePoint code)
{
if ((code & 0xff00) != 0) return 2;
else return 1;
}
extern int
onigenc_mb4_code_to_mbclen(OnigCodePoint code)
{
if ((code & 0xff000000) != 0) return 4;
else if ((code & 0xff0000) != 0) return 3;
else if ((code & 0xff00) != 0) return 2;
else return 1;
}
extern int
onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
{
UChar *p = buf;
if ((code & 0xff00) != 0) {
*p++ = (UChar )((code >> 8) & 0xff);
}
*p++ = (UChar )(code & 0xff);
#if 1
if (enclen(enc, buf) != (p - buf))
return ONIGERR_INVALID_CODE_POINT_VALUE;
#endif
return (int )(p - buf);
}
extern int
onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
{
UChar *p = buf;
if ((code & 0xff000000) != 0) {
*p++ = (UChar )((code >> 24) & 0xff);
}
if ((code & 0xff0000) != 0 || p != buf) {
*p++ = (UChar )((code >> 16) & 0xff);
}
if ((code & 0xff00) != 0 || p != buf) {
*p++ = (UChar )((code >> 8) & 0xff);
}
*p++ = (UChar )(code & 0xff);
#if 1
if (enclen(enc, buf) != (p - buf))
return ONIGERR_INVALID_CODE_POINT_VALUE;
#endif
return (int )(p - buf);
}
extern int
onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
{
static const PosixBracketEntryType PBS[] = {
{ (UChar* )"Alnum", ONIGENC_CTYPE_ALNUM, 5 },
{ (UChar* )"Alpha", ONIGENC_CTYPE_ALPHA, 5 },
{ (UChar* )"Blank", ONIGENC_CTYPE_BLANK, 5 },
{ (UChar* )"Cntrl", ONIGENC_CTYPE_CNTRL, 5 },
{ (UChar* )"Digit", ONIGENC_CTYPE_DIGIT, 5 },
{ (UChar* )"Graph", ONIGENC_CTYPE_GRAPH, 5 },
{ (UChar* )"Lower", ONIGENC_CTYPE_LOWER, 5 },
{ (UChar* )"Print", ONIGENC_CTYPE_PRINT, 5 },
{ (UChar* )"Punct", ONIGENC_CTYPE_PUNCT, 5 },
{ (UChar* )"Space", ONIGENC_CTYPE_SPACE, 5 },
{ (UChar* )"Upper", ONIGENC_CTYPE_UPPER, 5 },
{ (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 },
{ (UChar* )"ASCII", ONIGENC_CTYPE_ASCII, 5 },
{ (UChar* )"Word", ONIGENC_CTYPE_WORD, 4 },
{ (UChar* )NULL, -1, 0 }
};
const PosixBracketEntryType *pb;
int len;
len = onigenc_strlen(enc, p, end);
for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
if (len == pb->len &&
onigenc_with_ascii_strnicmp(enc, p, end, pb->name, pb->len) == 0)
return pb->ctype;
}
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
}
extern int
onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
unsigned int ctype)
{
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
else {
if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
}
}
return FALSE;
}
extern int
onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
unsigned int ctype)
{
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
else {
if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
}
}
return FALSE;
}
extern int
onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,
const UChar* sascii /* ascii */, int n)
{
int x, c;
while (n-- > 0) {
if (p >= end) return (int )(*sascii);
c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
x = *sascii - c;
if (x) return x;
sascii++;
p += enclen(enc, p);
}
return 0;
}
extern int
onigenc_with_ascii_strnicmp(OnigEncoding enc, const UChar* p, const UChar* end,
const UChar* sascii /* ascii */, int n)
{
int x, c;
while (n-- > 0) {
if (p >= end) return (int )(*sascii);
c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
if (ONIGENC_IS_ASCII_CODE(c))
c = ONIGENC_ASCII_CODE_TO_LOWER_CASE(c);
x = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*sascii) - c;
if (x) return x;
sascii++;
p += enclen(enc, p);
}
return 0;
}
/* Property management */
static int
resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize)
{
size_t size;
const OnigCodePoint **list = *plist;
size = sizeof(OnigCodePoint*) * new_size;
if (IS_NULL(list)) {
list = (const OnigCodePoint** )xmalloc(size);
}
else {
list = (const OnigCodePoint** )xrealloc((void* )list, size);
}
if (IS_NULL(list)) return ONIGERR_MEMORY;
*plist = list;
*psize = new_size;
return 0;
}
extern int
onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop,
hash_table_type **table, const OnigCodePoint*** plist, int *pnum,
int *psize)
{
#define PROP_INIT_SIZE 16
int r;
if (*psize <= *pnum) {
int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2);
r = resize_property_list(new_size, plist, psize);
if (r != 0) return r;
}
(*plist)[*pnum] = prop;
if (ONIG_IS_NULL(*table)) {
*table = onig_st_init_strend_table_with_size(PROP_INIT_SIZE);
if (ONIG_IS_NULL(*table)) return ONIGERR_MEMORY;
}
*pnum = *pnum + 1;
onig_st_insert_strend(*table, name, name + strlen((char* )name),
(hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE));
return 0;
}
extern int
onigenc_property_list_init(int (*f)(void))
{
int r;
THREAD_ATOMIC_START;
r = f();
THREAD_ATOMIC_END;
return r;
}

197
src/Onigmo/regenc.h Normal file
View File

@ -0,0 +1,197 @@
#ifndef REGENC_H
#define REGENC_H
/**********************************************************************
regenc.h - Onigmo (Oniguruma-mod) (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef PACKAGE
/* PACKAGE is defined in config.h */
#include "config.h"
#endif
#ifdef ONIG_ESCAPE_UCHAR_COLLISION
#undef ONIG_ESCAPE_UCHAR_COLLISION
#endif
#include "oniguruma.h"
typedef struct {
OnigCodePoint from;
OnigCodePoint to;
} OnigPairCaseFoldCodes;
#ifndef NULL
#define NULL ((void* )0)
#endif
#ifndef TRUE
#define TRUE 1
#endif
#ifndef FALSE
#define FALSE 0
#endif
#ifndef ARG_UNUSED
#if defined(__GNUC__)
# define ARG_UNUSED __attribute__ ((unused))
#else
# define ARG_UNUSED
#endif
#endif
#define ONIG_IS_NULL(p) (((void*)(p)) == (void*)0)
#define ONIG_IS_NOT_NULL(p) (((void*)(p)) != (void*)0)
#define ONIG_CHECK_NULL_RETURN(p) if (ONIG_IS_NULL(p)) return NULL
#define ONIG_CHECK_NULL_RETURN_VAL(p,val) if (ONIG_IS_NULL(p)) return (val)
#define enclen(enc,p) ONIGENC_MBC_ENC_LEN(enc,p)
/* character types bit flag */
#define BIT_CTYPE_NEWLINE (1<< ONIGENC_CTYPE_NEWLINE)
#define BIT_CTYPE_ALPHA (1<< ONIGENC_CTYPE_ALPHA)
#define BIT_CTYPE_BLANK (1<< ONIGENC_CTYPE_BLANK)
#define BIT_CTYPE_CNTRL (1<< ONIGENC_CTYPE_CNTRL)
#define BIT_CTYPE_DIGIT (1<< ONIGENC_CTYPE_DIGIT)
#define BIT_CTYPE_GRAPH (1<< ONIGENC_CTYPE_GRAPH)
#define BIT_CTYPE_LOWER (1<< ONIGENC_CTYPE_LOWER)
#define BIT_CTYPE_PRINT (1<< ONIGENC_CTYPE_PRINT)
#define BIT_CTYPE_PUNCT (1<< ONIGENC_CTYPE_PUNCT)
#define BIT_CTYPE_SPACE (1<< ONIGENC_CTYPE_SPACE)
#define BIT_CTYPE_UPPER (1<< ONIGENC_CTYPE_UPPER)
#define BIT_CTYPE_XDIGIT (1<< ONIGENC_CTYPE_XDIGIT)
#define BIT_CTYPE_WORD (1<< ONIGENC_CTYPE_WORD)
#define BIT_CTYPE_ALNUM (1<< ONIGENC_CTYPE_ALNUM)
#define BIT_CTYPE_ASCII (1<< ONIGENC_CTYPE_ASCII)
#define CTYPE_TO_BIT(ctype) (1<<(ctype))
#define CTYPE_IS_WORD_GRAPH_PRINT(ctype) \
((ctype) == ONIGENC_CTYPE_WORD || (ctype) == ONIGENC_CTYPE_GRAPH ||\
(ctype) == ONIGENC_CTYPE_PRINT)
typedef struct {
const UChar *name;
int ctype;
short int len;
} PosixBracketEntryType;
#define USE_CRNL_AS_LINE_TERMINATOR
#define USE_UNICODE_PROPERTIES
/* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */
/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTS #18 */
#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII
/* for encoding system implementation (internal) */
ONIG_EXTERN int onigenc_ascii_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
ONIG_EXTERN int onigenc_ascii_get_case_fold_codes_by_str P_((OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
ONIG_EXTERN int onigenc_apply_all_case_fold_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
ONIG_EXTERN int onigenc_get_case_fold_codes_by_str_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[]));
ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end));
/* methods for single byte encoding */
ONIG_EXTERN int onigenc_ascii_mbc_case_fold P_((OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower));
ONIG_EXTERN int onigenc_single_byte_mbc_enc_len P_((const UChar* p));
ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code P_((const UChar* p, const UChar* end));
ONIG_EXTERN int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code));
ONIG_EXTERN int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf));
ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((const UChar* start, const UChar* s));
ONIG_EXTERN int onigenc_always_true_is_allowed_reverse_match P_((const UChar* s, const UChar* end));
ONIG_EXTERN int onigenc_always_false_is_allowed_reverse_match P_((const UChar* s, const UChar* end));
ONIG_EXTERN int onigenc_ascii_is_code_ctype P_((OnigCodePoint code, unsigned int ctype));
/* methods for multi byte encoding */
ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, const UChar* p, const UChar* end));
ONIG_EXTERN int onigenc_mbn_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower));
ONIG_EXTERN int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code));
ONIG_EXTERN int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
ONIG_EXTERN int onigenc_minimum_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end));
ONIG_EXTERN int onigenc_unicode_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end));
ONIG_EXTERN int onigenc_mb2_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
ONIG_EXTERN int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code));
ONIG_EXTERN int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
ONIG_EXTERN int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
/* in enc/unicode.c */
ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype));
ONIG_EXTERN int onigenc_utf16_32_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[]));
ONIG_EXTERN int onigenc_unicode_ctype_code_range P_((int ctype, const OnigCodePoint* ranges[]));
ONIG_EXTERN int onigenc_unicode_get_case_fold_codes_by_str P_((OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
ONIG_EXTERN int onigenc_unicode_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold));
ONIG_EXTERN int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8)
#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc)
#define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \
OnigEncISO_8859_1_ToLowerCaseTable[c]
#define ONIGENC_ISO_8859_1_TO_UPPER_CASE(c) \
OnigEncISO_8859_1_ToUpperCaseTable[c]
ONIG_EXTERN const UChar OnigEncISO_8859_1_ToLowerCaseTable[];
ONIG_EXTERN const UChar OnigEncISO_8859_1_ToUpperCaseTable[];
ONIG_EXTERN int
onigenc_with_ascii_strncmp P_((OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n));
ONIG_EXTERN int
onigenc_with_ascii_strnicmp P_((OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n));
ONIG_EXTERN UChar*
onigenc_step P_((OnigEncoding enc, const UChar* p, const UChar* end, int n));
/* defined in regexec.c, but used in enc/xxx.c */
extern int onig_is_in_code_range P_((const UChar* p, OnigCodePoint code));
ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding;
ONIG_EXTERN const UChar OnigEncAsciiToLowerCaseTable[];
ONIG_EXTERN const UChar OnigEncAsciiToUpperCaseTable[];
ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[];
#define ONIGENC_IS_ASCII_CODE(code) ((code) < 0x80)
#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c]
#define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) OnigEncAsciiToUpperCaseTable[c]
#define ONIGENC_IS_ASCII_CODE_CTYPE(code,ctype) \
((OnigEncAsciiCtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
#define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \
(ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_UPPER) ||\
ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_LOWER))
/* Check if the code is in the range. (from <= code && code <= to) */
#define ONIGENC_IS_IN_RANGE(code, from, to) \
((OnigCodePoint )((code) - (from)) <= (OnigCodePoint )((to) - (from)))
#endif /* REGENC_H */

394
src/Onigmo/regerror.c Normal file
View File

@ -0,0 +1,394 @@
/**********************************************************************
regerror.c - Onigmo (Oniguruma-mod) (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regint.h"
#include <stdio.h> /* for vsnprintf() */
#ifdef HAVE_STDARG_PROTOTYPES
#include <stdarg.h>
#define va_init_list(a,b) va_start(a,b)
#else
#include <varargs.h>
#define va_init_list(a,b) va_start(a)
#endif
extern UChar*
onig_error_code_to_format(OnigPosition code)
{
const char *p;
if (code >= 0) return (UChar* )0;
switch (code) {
case ONIG_MISMATCH:
p = "mismatch"; break;
case ONIG_NO_SUPPORT_CONFIG:
p = "no support in this configuration"; break;
case ONIGERR_MEMORY:
p = "failed to allocate memory"; break;
case ONIGERR_MATCH_STACK_LIMIT_OVER:
p = "match-stack limit over"; break;
case ONIGERR_TYPE_BUG:
p = "undefined type (bug)"; break;
case ONIGERR_PARSER_BUG:
p = "internal parser error (bug)"; break;
case ONIGERR_STACK_BUG:
p = "stack error (bug)"; break;
case ONIGERR_UNDEFINED_BYTECODE:
p = "undefined bytecode (bug)"; break;
case ONIGERR_UNEXPECTED_BYTECODE:
p = "unexpected bytecode (bug)"; break;
case ONIGERR_DEFAULT_ENCODING_IS_NOT_SET:
p = "default multibyte-encoding is not set"; break;
case ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR:
p = "can't convert to wide-char on specified multibyte-encoding"; break;
case ONIGERR_INVALID_ARGUMENT:
p = "invalid argument"; break;
case ONIGERR_END_PATTERN_AT_LEFT_BRACE:
p = "end pattern at left brace"; break;
case ONIGERR_END_PATTERN_AT_LEFT_BRACKET:
p = "end pattern at left bracket"; break;
case ONIGERR_EMPTY_CHAR_CLASS:
p = "empty char-class"; break;
case ONIGERR_PREMATURE_END_OF_CHAR_CLASS:
p = "premature end of char-class"; break;
case ONIGERR_END_PATTERN_AT_ESCAPE:
p = "end pattern at escape"; break;
case ONIGERR_END_PATTERN_AT_META:
p = "end pattern at meta"; break;
case ONIGERR_END_PATTERN_AT_CONTROL:
p = "end pattern at control"; break;
case ONIGERR_META_CODE_SYNTAX:
p = "invalid meta-code syntax"; break;
case ONIGERR_CONTROL_CODE_SYNTAX:
p = "invalid control-code syntax"; break;
case ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE:
p = "char-class value at end of range"; break;
case ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE:
p = "char-class value at start of range"; break;
case ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS:
p = "unmatched range specifier in char-class"; break;
case ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED:
p = "target of repeat operator is not specified"; break;
case ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID:
p = "target of repeat operator is invalid"; break;
case ONIGERR_NESTED_REPEAT_OPERATOR:
p = "nested repeat operator"; break;
case ONIGERR_UNMATCHED_CLOSE_PARENTHESIS:
p = "unmatched close parenthesis"; break;
case ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS:
p = "end pattern with unmatched parenthesis"; break;
case ONIGERR_END_PATTERN_IN_GROUP:
p = "end pattern in group"; break;
case ONIGERR_UNDEFINED_GROUP_OPTION:
p = "undefined group option"; break;
case ONIGERR_INVALID_POSIX_BRACKET_TYPE:
p = "invalid POSIX bracket type"; break;
case ONIGERR_INVALID_LOOK_BEHIND_PATTERN:
p = "invalid pattern in look-behind"; break;
case ONIGERR_INVALID_REPEAT_RANGE_PATTERN:
p = "invalid repeat range {lower,upper}"; break;
case ONIGERR_INVALID_CONDITION_PATTERN:
p = "invalid conditional pattern"; break;
case ONIGERR_TOO_BIG_NUMBER:
p = "too big number"; break;
case ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE:
p = "too big number for repeat range"; break;
case ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE:
p = "upper is smaller than lower in repeat range"; break;
case ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS:
p = "empty range in char class"; break;
case ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE:
p = "mismatch multibyte code length in char-class range"; break;
case ONIGERR_TOO_MANY_MULTI_BYTE_RANGES:
p = "too many multibyte code ranges are specified"; break;
case ONIGERR_TOO_SHORT_MULTI_BYTE_STRING:
p = "too short multibyte code string"; break;
case ONIGERR_TOO_BIG_BACKREF_NUMBER:
p = "too big backref number"; break;
case ONIGERR_INVALID_BACKREF:
#ifdef USE_NAMED_GROUP
p = "invalid backref number/name"; break;
#else
p = "invalid backref number"; break;
#endif
case ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED:
p = "numbered backref/call is not allowed. (use name)"; break;
case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
p = "too big wide-char value"; break;
case ONIGERR_TOO_SHORT_DIGITS:
p = "too short digits"; break;
case ONIGERR_TOO_LONG_WIDE_CHAR_VALUE:
p = "too long wide-char value"; break;
case ONIGERR_INVALID_CODE_POINT_VALUE:
p = "invalid code point value"; break;
case ONIGERR_EMPTY_GROUP_NAME:
p = "group name is empty"; break;
case ONIGERR_INVALID_GROUP_NAME:
p = "invalid group name <%n>"; break;
case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
#ifdef USE_NAMED_GROUP
p = "invalid char in group name <%n>"; break;
#else
p = "invalid char in group number <%n>"; break;
#endif
case ONIGERR_UNDEFINED_NAME_REFERENCE:
p = "undefined name <%n> reference"; break;
case ONIGERR_UNDEFINED_GROUP_REFERENCE:
p = "undefined group <%n> reference"; break;
case ONIGERR_MULTIPLEX_DEFINED_NAME:
p = "multiplex defined name <%n>"; break;
case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
p = "multiplex definition name <%n> call"; break;
case ONIGERR_NEVER_ENDING_RECURSION:
p = "never ending recursion"; break;
case ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY:
p = "group number is too big for capture history"; break;
case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
p = "invalid character property name {%n}"; break;
case ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION:
p = "not supported encoding combination"; break;
case ONIGERR_INVALID_COMBINATION_OF_OPTIONS:
p = "invalid combination of options"; break;
case ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT:
p = "over thread pass limit count"; break;
default:
p = "undefined error code"; break;
}
return (UChar* )p;
}
static void sprint_byte(char* s, unsigned int v)
{
sprintf(s, "%02x", (v & 0377));
}
static void sprint_byte_with_x(char* s, unsigned int v)
{
sprintf(s, "\\x%02x", (v & 0377));
}
static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,
UChar buf[], int buf_size, int *is_over)
{
int len;
UChar *p;
OnigCodePoint code;
if (ONIGENC_MBC_MINLEN(enc) > 1) {
p = s;
len = 0;
while (p < end) {
code = ONIGENC_MBC_TO_CODE(enc, p, end);
if (code >= 0x80) {
if (code > 0xffff && len + 10 <= buf_size) {
sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 24));
sprint_byte((char*)(&(buf[len+4])), (unsigned int)(code >> 16));
sprint_byte((char*)(&(buf[len+6])), (unsigned int)(code >> 8));
sprint_byte((char*)(&(buf[len+8])), (unsigned int)code);
len += 10;
}
else if (len + 6 <= buf_size) {
sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 8));
sprint_byte((char*)(&(buf[len+4])), (unsigned int)code);
len += 6;
}
else {
break;
}
}
else {
buf[len++] = (UChar )code;
}
p += enclen(enc, p);
if (len >= buf_size) break;
}
*is_over = ((p < end) ? 1 : 0);
}
else {
len = (int )MIN((end - s), buf_size);
xmemcpy(buf, s, (size_t )len);
*is_over = ((buf_size < (end - s)) ? 1 : 0);
}
return len;
}
/* for ONIG_MAX_ERROR_MESSAGE_LEN */
#define MAX_ERROR_PAR_LEN 30
extern int
#ifdef HAVE_STDARG_PROTOTYPES
onig_error_code_to_str(UChar* s, OnigPosition code, ...)
#else
onig_error_code_to_str(s, code, va_alist)
UChar* s;
OnigPosition code;
va_dcl
#endif
{
UChar *p, *q;
OnigErrorInfo* einfo;
size_t len;
int is_over;
UChar parbuf[MAX_ERROR_PAR_LEN];
va_list vargs;
va_init_list(vargs, code);
switch (code) {
case ONIGERR_UNDEFINED_NAME_REFERENCE:
case ONIGERR_UNDEFINED_GROUP_REFERENCE:
case ONIGERR_MULTIPLEX_DEFINED_NAME:
case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
case ONIGERR_INVALID_GROUP_NAME:
case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
einfo = va_arg(vargs, OnigErrorInfo*);
len = to_ascii(einfo->enc, einfo->par, einfo->par_end,
parbuf, MAX_ERROR_PAR_LEN - 3, &is_over);
q = onig_error_code_to_format(code);
p = s;
while (*q != '\0') {
if (*q == '%') {
q++;
if (*q == 'n') { /* '%n': name */
xmemcpy(p, parbuf, len);
p += len;
if (is_over != 0) {
xmemcpy(p, "...", 3);
p += 3;
}
q++;
}
else
goto normal_char;
}
else {
normal_char:
*p++ = *q++;
}
}
*p = '\0';
len = p - s;
break;
default:
q = onig_error_code_to_format(code);
len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, q);
xmemcpy(s, q, len);
s[len] = '\0';
break;
}
va_end(vargs);
return (int )len;
}
void
#ifdef HAVE_STDARG_PROTOTYPES
onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
UChar* pat, UChar* pat_end, const UChar *fmt, ...)
#else
onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
UChar buf[];
int bufsize;
OnigEncoding enc;
UChar* pat;
UChar* pat_end;
const UChar *fmt;
va_dcl
#endif
{
size_t need;
int n, len;
UChar *p, *s, *bp;
UChar bs[6];
va_list args;
va_init_list(args, fmt);
n = xvsnprintf((char* )buf, bufsize, (const char* )fmt, args);
va_end(args);
need = (pat_end - pat) * 4 + 4;
if (n + need < (size_t )bufsize) {
strcat((char* )buf, ": /");
s = buf + onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, buf);
p = pat;
while (p < pat_end) {
if (*p == '\\') {
*s++ = *p++;
len = enclen(enc, p);
while (len-- > 0) *s++ = *p++;
}
else if (*p == '/') {
*s++ = (unsigned char )'\\';
*s++ = *p++;
}
else if (ONIGENC_IS_MBC_HEAD(enc, p)) {
len = enclen(enc, p);
if (ONIGENC_MBC_MINLEN(enc) == 1) {
while (len-- > 0) *s++ = *p++;
}
else { /* for UTF16 */
int blen;
while (len-- > 0) {
sprint_byte_with_x((char* )bs, (unsigned int )(*p++));
blen = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
bp = bs;
while (blen-- > 0) *s++ = *bp++;
}
}
}
else if (!ONIGENC_IS_CODE_PRINT(enc, *p) &&
!ONIGENC_IS_CODE_SPACE(enc, *p)) {
sprint_byte_with_x((char* )bs, (unsigned int )(*p++));
len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
bp = bs;
while (len-- > 0) *s++ = *bp++;
}
else {
*s++ = *p++;
}
}
*s++ = '/';
*s = '\0';
}
}

4335
src/Onigmo/regexec.c Normal file

File diff suppressed because it is too large Load Diff

223
src/Onigmo/regext.c Normal file
View File

@ -0,0 +1,223 @@
/**********************************************************************
regext.c - Onigmo (Oniguruma-mod) (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regint.h"
static void
conv_ext0be32(const UChar* s, const UChar* end, UChar* conv)
{
while (s < end) {
*conv++ = '\0';
*conv++ = '\0';
*conv++ = '\0';
*conv++ = *s++;
}
}
static void
conv_ext0le32(const UChar* s, const UChar* end, UChar* conv)
{
while (s < end) {
*conv++ = *s++;
*conv++ = '\0';
*conv++ = '\0';
*conv++ = '\0';
}
}
static void
conv_ext0be(const UChar* s, const UChar* end, UChar* conv)
{
while (s < end) {
*conv++ = '\0';
*conv++ = *s++;
}
}
static void
conv_ext0le(const UChar* s, const UChar* end, UChar* conv)
{
while (s < end) {
*conv++ = *s++;
*conv++ = '\0';
}
}
static void
conv_swap4bytes(const UChar* s, const UChar* end, UChar* conv)
{
while (s < end) {
*conv++ = s[3];
*conv++ = s[2];
*conv++ = s[1];
*conv++ = s[0];
s += 4;
}
}
static void
conv_swap2bytes(const UChar* s, const UChar* end, UChar* conv)
{
while (s < end) {
*conv++ = s[1];
*conv++ = s[0];
s += 2;
}
}
static int
conv_encoding(OnigEncoding from, OnigEncoding to, const UChar* s, const UChar* end,
UChar** conv, UChar** conv_end)
{
ptrdiff_t len = end - s;
if (to == ONIG_ENCODING_UTF16_BE) {
if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
*conv = (UChar* )xmalloc(len * 2);
CHECK_NULL_RETURN_MEMERR(*conv);
*conv_end = *conv + (len * 2);
conv_ext0be(s, end, *conv);
return 0;
}
else if (from == ONIG_ENCODING_UTF16_LE) {
swap16:
*conv = (UChar* )xmalloc(len);
CHECK_NULL_RETURN_MEMERR(*conv);
*conv_end = *conv + len;
conv_swap2bytes(s, end, *conv);
return 0;
}
}
else if (to == ONIG_ENCODING_UTF16_LE) {
if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
*conv = (UChar* )xmalloc(len * 2);
CHECK_NULL_RETURN_MEMERR(*conv);
*conv_end = *conv + (len * 2);
conv_ext0le(s, end, *conv);
return 0;
}
else if (from == ONIG_ENCODING_UTF16_BE) {
goto swap16;
}
}
if (to == ONIG_ENCODING_UTF32_BE) {
if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
*conv = (UChar* )xmalloc(len * 4);
CHECK_NULL_RETURN_MEMERR(*conv);
*conv_end = *conv + (len * 4);
conv_ext0be32(s, end, *conv);
return 0;
}
else if (from == ONIG_ENCODING_UTF32_LE) {
swap32:
*conv = (UChar* )xmalloc(len);
CHECK_NULL_RETURN_MEMERR(*conv);
*conv_end = *conv + len;
conv_swap4bytes(s, end, *conv);
return 0;
}
}
else if (to == ONIG_ENCODING_UTF32_LE) {
if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
*conv = (UChar* )xmalloc(len * 4);
CHECK_NULL_RETURN_MEMERR(*conv);
*conv_end = *conv + (len * 4);
conv_ext0le32(s, end, *conv);
return 0;
}
else if (from == ONIG_ENCODING_UTF32_BE) {
goto swap32;
}
}
return ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION;
}
extern int
onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
OnigCompileInfo* ci, OnigErrorInfo* einfo)
{
int r;
UChar *cpat, *cpat_end;
if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
if (ci->pattern_enc != ci->target_enc) {
r = conv_encoding(ci->pattern_enc, ci->target_enc, pattern, pattern_end,
&cpat, &cpat_end);
if (r) return r;
}
else {
cpat = (UChar* )pattern;
cpat_end = (UChar* )pattern_end;
}
*reg = (regex_t* )xmalloc(sizeof(regex_t));
if (IS_NULL(*reg)) {
r = ONIGERR_MEMORY;
goto err2;
}
r = onig_reg_init(*reg, ci->option, ci->case_fold_flag, ci->target_enc,
ci->syntax);
if (r) goto err;
r = onig_compile(*reg, cpat, cpat_end, einfo);
if (r) {
err:
onig_free(*reg);
*reg = NULL;
}
err2:
if (cpat != pattern) xfree(cpat);
return r;
}
#ifdef USE_RECOMPILE_API
extern int
onig_recompile_deluxe(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
OnigCompileInfo* ci, OnigErrorInfo* einfo)
{
int r;
regex_t *new_reg;
r = onig_new_deluxe(&new_reg, pattern, pattern_end, ci, einfo);
if (r) return r;
if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
onig_transfer(reg, new_reg);
}
else {
onig_chain_link_add(reg, new_reg);
}
return 0;
}
#endif

168
src/Onigmo/reggnu.c Normal file
View File

@ -0,0 +1,168 @@
/**********************************************************************
reggnu.c - Onigmo (Oniguruma-mod) (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regint.h"
#ifndef ONIGGNU_H
#include "oniggnu.h"
#endif
extern void
re_free_registers(OnigRegion* r)
{
/* 0: don't free self */
onig_region_free(r, 0);
}
extern int
re_adjust_startpos(regex_t* reg, const char* string, int size,
int startpos, int range)
{
if (startpos > 0 && ONIGENC_MBC_MAXLEN(reg->enc) != 1 && startpos < size) {
UChar *p;
UChar *s = (UChar* )string + startpos;
if (range > 0) {
p = onigenc_get_right_adjust_char_head(reg->enc, (UChar* )string, s);
}
else {
p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, (UChar* )string, s);
}
return (int )(p - (UChar* )string);
}
return startpos;
}
extern int
re_match(regex_t* reg, const char* str, int size, int pos,
struct re_registers* regs)
{
return (int )onig_match(reg, (UChar* )str, (UChar* )(str + size),
(UChar* )(str + pos), regs, ONIG_OPTION_NONE);
}
extern int
re_search(regex_t* bufp, const char* string, int size, int startpos, int range,
struct re_registers* regs)
{
return (int )onig_search(bufp, (UChar* )string, (UChar* )(string + size),
(UChar* )(string + startpos),
(UChar* )(string + startpos + range),
regs, ONIG_OPTION_NONE);
}
extern int
re_compile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
{
int r;
OnigErrorInfo einfo;
r = onig_compile(reg, (UChar* )pattern, (UChar* )(pattern + size), &einfo);
if (r != ONIG_NORMAL) {
if (IS_NOT_NULL(ebuf))
(void )onig_error_code_to_str((UChar* )ebuf, r, &einfo);
}
return r;
}
#ifdef USE_RECOMPILE_API
extern int
re_recompile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
{
int r;
OnigErrorInfo einfo;
OnigEncoding enc;
/* I think encoding and options should be arguments of this function.
But this is adapted to present re.c. (2002/11/29)
*/
enc = OnigEncDefaultCharEncoding;
r = onig_recompile(reg, (UChar* )pattern, (UChar* )(pattern + size),
reg->options, enc, OnigDefaultSyntax, &einfo);
if (r != ONIG_NORMAL) {
if (IS_NOT_NULL(ebuf))
(void )onig_error_code_to_str((UChar* )ebuf, r, &einfo);
}
return r;
}
#endif
extern void
re_free_pattern(regex_t* reg)
{
onig_free(reg);
}
extern int
re_alloc_pattern(regex_t** reg)
{
*reg = (regex_t* )xmalloc(sizeof(regex_t));
if (IS_NULL(*reg)) return ONIGERR_MEMORY;
return onig_reg_init(*reg, ONIG_OPTION_DEFAULT,
ONIGENC_CASE_FOLD_DEFAULT,
OnigEncDefaultCharEncoding,
OnigDefaultSyntax);
}
extern void
re_set_casetable(const char* table)
{
onigenc_set_default_caseconv_table((UChar* )table);
}
extern void
re_mbcinit(int mb_code)
{
OnigEncoding enc;
switch (mb_code) {
case RE_MBCTYPE_ASCII:
enc = ONIG_ENCODING_ASCII;
break;
case RE_MBCTYPE_EUC:
enc = ONIG_ENCODING_EUC_JP;
break;
case RE_MBCTYPE_SJIS:
enc = ONIG_ENCODING_SJIS;
break;
case RE_MBCTYPE_UTF8:
enc = ONIG_ENCODING_UTF8;
break;
default:
return ;
break;
}
onigenc_set_default_encoding(enc);
}

868
src/Onigmo/regint.h Normal file
View File

@ -0,0 +1,868 @@
#ifndef REGINT_H
#define REGINT_H
/**********************************************************************
regint.h - Onigmo (Oniguruma-mod) (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2011-2012 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/* for debug */
/* #define ONIG_DEBUG_PARSE_TREE */
/* #define ONIG_DEBUG_COMPILE */
/* #define ONIG_DEBUG_SEARCH */
/* #define ONIG_DEBUG_MATCH */
/* #define ONIG_DONT_OPTIMIZE */
/* for byte-code statistical data. */
/* #define ONIG_DEBUG_STATISTICS */
#if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \
defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE) || \
defined(ONIG_DEBUG_STATISTICS)
#ifndef ONIG_DEBUG
#define ONIG_DEBUG
#endif
#endif
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
defined(__mc68020__)
#define PLATFORM_UNALIGNED_WORD_ACCESS
#endif
/* config */
/* spec. config */
#define USE_NAMED_GROUP
#define USE_SUBEXP_CALL
#define USE_PERL_SUBEXP_CALL
#define USE_CAPITAL_P_NAMED_GROUP
#define USE_BACKREF_WITH_LEVEL /* \k<name+n>, \k<name-n> */
#define USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT /* /(?:()|())*\2/ */
#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
/* #define USE_RECOMPILE_API */
/* !!! moved to regenc.h. */ /* #define USE_CRNL_AS_LINE_TERMINATOR */
#define USE_NO_INVALID_QUANTIFIER
/* internal config */
#define USE_PARSE_TREE_NODE_RECYCLE
#define USE_OP_PUSH_OR_JUMP_EXACT
#define USE_QTFR_PEEK_NEXT
#define USE_ST_LIBRARY
#define USE_SHARED_CCLASS_TABLE
#define USE_SUNDAY_QUICK_SEARCH
#define INIT_MATCH_STACK_SIZE 160
#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */
/* check config */
#if defined(USE_PERL_SUBEXP_CALL) || defined(USE_CAPITAL_P_NAMED_GROUP)
#if !defined(USE_NAMED_GROUP) || !defined(USE_SUBEXP_CALL)
#error USE_NAMED_GROUP and USE_SUBEXP_CALL must be defined.
#endif
#endif
#if defined(__GNUC__)
# define ARG_UNUSED __attribute__ ((unused))
#else
# define ARG_UNUSED
#endif
/* */
/* escape other system UChar definition */
#include "config.h"
#ifdef ONIG_ESCAPE_UCHAR_COLLISION
#undef ONIG_ESCAPE_UCHAR_COLLISION
#endif
#define USE_WORD_BEGIN_END /* "\<", "\>" */
#define USE_CAPTURE_HISTORY
#define USE_VARIABLE_META_CHARS
#define USE_POSIX_API_REGION_OPTION
#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
/* #define USE_COMBINATION_EXPLOSION_CHECK */ /* (X*)* */
/* #define USE_MULTI_THREAD_SYSTEM */
#define THREAD_SYSTEM_INIT /* depend on thread system */
#define THREAD_SYSTEM_END /* depend on thread system */
#define THREAD_ATOMIC_START /* depend on thread system */
#define THREAD_ATOMIC_END /* depend on thread system */
#define THREAD_PASS /* depend on thread system */
#define xmalloc malloc
#define xrealloc realloc
#define xcalloc calloc
#define xfree free
#define CHECK_INTERRUPT_IN_MATCH_AT
#define st_init_table onig_st_init_table
#define st_init_table_with_size onig_st_init_table_with_size
#define st_init_numtable onig_st_init_numtable
#define st_init_numtable_with_size onig_st_init_numtable_with_size
#define st_init_strtable onig_st_init_strtable
#define st_init_strtable_with_size onig_st_init_strtable_with_size
#define st_delete onig_st_delete
#define st_delete_safe onig_st_delete_safe
#define st_insert onig_st_insert
#define st_lookup onig_st_lookup
#define st_foreach onig_st_foreach
#define st_add_direct onig_st_add_direct
#define st_free_table onig_st_free_table
#define st_cleanup_safe onig_st_cleanup_safe
#define st_copy onig_st_copy
#define st_nothing_key_clone onig_st_nothing_key_clone
#define st_nothing_key_free onig_st_nothing_key_free
/* */
#define onig_st_is_member st_is_member
#define STATE_CHECK_STRING_THRESHOLD_LEN 7
#define STATE_CHECK_BUFF_MAX_SIZE 0x4000
#define THREAD_PASS_LIMIT_COUNT 8
#define xmemset memset
#define xmemcpy memcpy
#define xmemmove memmove
#if defined(_WIN32) && !defined(__GNUC__)
#define xalloca _alloca
#define xvsnprintf _vsnprintf
#else
#define xalloca alloca
#define xvsnprintf vsnprintf
#endif
#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
#define ONIG_STATE_INC(reg) (reg)->state++
#define ONIG_STATE_DEC(reg) (reg)->state--
#define ONIG_STATE_INC_THREAD(reg) do {\
THREAD_ATOMIC_START;\
(reg)->state++;\
THREAD_ATOMIC_END;\
} while(0)
#define ONIG_STATE_DEC_THREAD(reg) do {\
THREAD_ATOMIC_START;\
(reg)->state--;\
THREAD_ATOMIC_END;\
} while(0)
#else
#define ONIG_STATE_INC(reg) /* Nothing */
#define ONIG_STATE_DEC(reg) /* Nothing */
#define ONIG_STATE_INC_THREAD(reg) /* Nothing */
#define ONIG_STATE_DEC_THREAD(reg) /* Nothing */
#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#if defined(HAVE_ALLOCA_H) && (defined(_AIX) || !defined(__GNUC__))
#include <alloca.h>
#endif
#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif
#include <ctype.h>
#ifdef HAVE_SYS_TYPES_H
#ifndef __BORLANDC__
#include <sys/types.h>
#endif
#endif
#ifdef HAVE_STDINT_H
# include <stdint.h>
#endif
#ifdef STDC_HEADERS
# include <stddef.h>
#endif
#ifdef __BORLANDC__
#include <malloc.h>
#endif
#ifdef ONIG_DEBUG
# include <stdio.h>
#endif
#ifdef _WIN32
#if defined(_MSC_VER) && (_MSC_VER < 1300)
#ifndef _INTPTR_T_DEFINED
#define _INTPTR_T_DEFINED
typedef int intptr_t;
#endif
#ifndef _UINTPTR_T_DEFINED
#define _UINTPTR_T_DEFINED
typedef unsigned int uintptr_t;
#endif
#endif
#endif /* _WIN32 */
#include "regenc.h"
#ifdef MIN
#undef MIN
#endif
#ifdef MAX
#undef MAX
#endif
#define MIN(a,b) (((a)>(b))?(b):(a))
#define MAX(a,b) (((a)<(b))?(b):(a))
#define IS_NULL(p) (((void*)(p)) == (void*)0)
#define IS_NOT_NULL(p) (((void*)(p)) != (void*)0)
#define CHECK_NULL_RETURN(p) if (IS_NULL(p)) return NULL
#define CHECK_NULL_RETURN_MEMERR(p) if (IS_NULL(p)) return ONIGERR_MEMORY
#define NULL_UCHARP ((UChar* )0)
#define ONIG_LAST_CODE_POINT (~((OnigCodePoint )0))
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
#define PLATFORM_GET_INC(val,p,type) do{\
val = *(type* )p;\
(p) += sizeof(type);\
} while(0)
#else
#define PLATFORM_GET_INC(val,p,type) do{\
xmemcpy(&val, (p), sizeof(type));\
(p) += sizeof(type);\
} while(0)
/* sizeof(OnigCodePoint) */
#define WORD_ALIGNMENT_SIZE SIZEOF_LONG
#define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\
(pad_size) = WORD_ALIGNMENT_SIZE \
- ((uintptr_t )(addr) % WORD_ALIGNMENT_SIZE);\
if ((pad_size) == WORD_ALIGNMENT_SIZE) (pad_size) = 0;\
} while (0)
#define ALIGNMENT_RIGHT(addr) do {\
(addr) += (WORD_ALIGNMENT_SIZE - 1);\
(addr) -= ((uintptr_t )(addr) % WORD_ALIGNMENT_SIZE);\
} while (0)
#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */
/* stack pop level */
#define STACK_POP_LEVEL_FREE 0
#define STACK_POP_LEVEL_MEM_START 1
#define STACK_POP_LEVEL_ALL 2
/* optimize flags */
#define ONIG_OPTIMIZE_NONE 0
#define ONIG_OPTIMIZE_EXACT 1 /* Slow Search */
#define ONIG_OPTIMIZE_EXACT_BM 2 /* Boyer Moore Search */
#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV 3 /* BM (applied to a multibyte string) */
#define ONIG_OPTIMIZE_EXACT_IC 4 /* Slow Search (ignore case) */
#define ONIG_OPTIMIZE_MAP 5 /* char map */
#define ONIG_OPTIMIZE_EXACT_BM_IC 6 /* BM (ignore case) */
#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC 7 /* BM (applied to a multibyte string) (ignore case) */
/* bit status */
typedef unsigned int BitStatusType;
#define BIT_STATUS_BITS_NUM (sizeof(BitStatusType) * 8)
#define BIT_STATUS_CLEAR(stats) (stats) = 0
#define BIT_STATUS_ON_ALL(stats) (stats) = ~((BitStatusType )0)
#define BIT_STATUS_AT(stats,n) \
((n) < (int )BIT_STATUS_BITS_NUM ? ((stats) & (1 << n)) : ((stats) & 1))
#define BIT_STATUS_ON_AT(stats,n) do {\
if ((n) < (int )BIT_STATUS_BITS_NUM) \
(stats) |= (1 << (n));\
else\
(stats) |= 1;\
} while (0)
#define BIT_STATUS_ON_AT_SIMPLE(stats,n) do {\
if ((n) < (int )BIT_STATUS_BITS_NUM)\
(stats) |= (1 << (n));\
} while (0)
#define INT_MAX_LIMIT ((1UL << (SIZEOF_INT * 8 - 1)) - 1)
#define DIGITVAL(code) ((code) - '0')
#define ODIGITVAL(code) DIGITVAL(code)
#define XDIGITVAL(enc,code) \
(ONIGENC_IS_CODE_DIGIT(enc,code) ? DIGITVAL(code) \
: (ONIGENC_IS_CODE_UPPER(enc,code) ? (code) - 'A' + 10 : (code) - 'a' + 10))
#define IS_SINGLELINE(option) ((option) & ONIG_OPTION_SINGLELINE)
#define IS_MULTILINE(option) ((option) & ONIG_OPTION_MULTILINE)
#define IS_IGNORECASE(option) ((option) & ONIG_OPTION_IGNORECASE)
#define IS_EXTEND(option) ((option) & ONIG_OPTION_EXTEND)
#define IS_FIND_LONGEST(option) ((option) & ONIG_OPTION_FIND_LONGEST)
#define IS_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY)
#define IS_FIND_CONDITION(option) ((option) & \
(ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY))
#define IS_NOTBOL(option) ((option) & ONIG_OPTION_NOTBOL)
#define IS_NOTEOL(option) ((option) & ONIG_OPTION_NOTEOL)
#define IS_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION)
#define IS_ASCII_RANGE(option) ((option) & ONIG_OPTION_ASCII_RANGE)
#define IS_POSIX_BRACKET_ALL_RANGE(option) ((option) & ONIG_OPTION_POSIX_BRACKET_ALL_RANGE)
#define IS_WORD_BOUND_ALL_RANGE(option) ((option) & ONIG_OPTION_WORD_BOUND_ALL_RANGE)
#define IS_NEWLINE_CRLF(option) ((option) & ONIG_OPTION_NEWLINE_CRLF)
/* OP_SET_OPTION is required for these options.
#define IS_DYNAMIC_OPTION(option) \
(((option) & (ONIG_OPTION_MULTILINE | ONIG_OPTION_IGNORECASE)) != 0)
*/
/* ignore-case and multibyte status are included in compiled code. */
#define IS_DYNAMIC_OPTION(option) 0
#define DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag) \
((case_fold_flag) & ~INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR)
#define REPEAT_INFINITE -1
#define IS_REPEAT_INFINITE(n) ((n) == REPEAT_INFINITE)
/* bitset */
#define BITS_PER_BYTE 8
#define SINGLE_BYTE_SIZE (1 << BITS_PER_BYTE)
#define BITS_IN_ROOM ((int )sizeof(Bits) * BITS_PER_BYTE)
#define BITSET_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM)
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
typedef unsigned int Bits;
#else
typedef unsigned char Bits;
#endif
typedef Bits BitSet[BITSET_SIZE];
typedef Bits* BitSetRef;
#define SIZE_BITSET (int )sizeof(BitSet)
#define BITSET_CLEAR(bs) do {\
int i;\
for (i = 0; i < BITSET_SIZE; i++) { (bs)[i] = 0; } \
} while (0)
#define BS_ROOM(bs,pos) (bs)[(int )(pos) / BITS_IN_ROOM]
#define BS_BIT(pos) (1 << ((int )(pos) % BITS_IN_ROOM))
#define BITSET_AT(bs, pos) (BS_ROOM(bs,pos) & BS_BIT(pos))
#define BITSET_SET_BIT(bs, pos) BS_ROOM(bs,pos) |= BS_BIT(pos)
#define BITSET_CLEAR_BIT(bs, pos) BS_ROOM(bs,pos) &= ~(BS_BIT(pos))
#define BITSET_INVERT_BIT(bs, pos) BS_ROOM(bs,pos) ^= BS_BIT(pos)
/* bytes buffer */
typedef struct _BBuf {
UChar* p;
unsigned int used;
unsigned int alloc;
} BBuf;
#define BBUF_INIT(buf,size) onig_bbuf_init((BBuf* )(buf), (size))
#define BBUF_SIZE_INC(buf,inc) do{\
(buf)->alloc += (inc);\
(buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\
if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
} while (0)
#define BBUF_EXPAND(buf,low) do{\
do { (buf)->alloc *= 2; } while ((buf)->alloc < (unsigned int )low);\
(buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\
if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
} while (0)
#define BBUF_ENSURE_SIZE(buf,size) do{\
unsigned int new_alloc = (buf)->alloc;\
while (new_alloc < (unsigned int )(size)) { new_alloc *= 2; }\
if ((buf)->alloc != new_alloc) {\
(buf)->p = (UChar* )xrealloc((buf)->p, new_alloc);\
if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
(buf)->alloc = new_alloc;\
}\
} while (0)
#define BBUF_WRITE(buf,pos,bytes,n) do{\
int used = (pos) + (int )(n);\
if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\
xmemcpy((buf)->p + (pos), (bytes), (n));\
if ((buf)->used < (unsigned int )used) (buf)->used = used;\
} while (0)
#define BBUF_WRITE1(buf,pos,byte) do{\
int used = (pos) + 1;\
if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\
(buf)->p[(pos)] = (UChar )(byte);\
if ((buf)->used < (unsigned int )used) (buf)->used = used;\
} while (0)
#define BBUF_ADD(buf,bytes,n) BBUF_WRITE((buf),(buf)->used,(bytes),(n))
#define BBUF_ADD1(buf,byte) BBUF_WRITE1((buf),(buf)->used,(byte))
#define BBUF_GET_ADD_ADDRESS(buf) ((buf)->p + (buf)->used)
#define BBUF_GET_OFFSET_POS(buf) ((buf)->used)
/* from < to */
#define BBUF_MOVE_RIGHT(buf,from,to,n) do {\
if ((unsigned int )((to)+(n)) > (buf)->alloc) BBUF_EXPAND((buf),(to) + (n));\
xmemmove((buf)->p + (to), (buf)->p + (from), (n));\
if ((unsigned int )((to)+(n)) > (buf)->used) (buf)->used = (to) + (n);\
} while (0)
/* from > to */
#define BBUF_MOVE_LEFT(buf,from,to,n) do {\
xmemmove((buf)->p + (to), (buf)->p + (from), (n));\
} while (0)
/* from > to */
#define BBUF_MOVE_LEFT_REDUCE(buf,from,to) do {\
xmemmove((buf)->p + (to), (buf)->p + (from), (buf)->used - (from));\
(buf)->used -= (from - to);\
} while (0)
#define BBUF_INSERT(buf,pos,bytes,n) do {\
if (pos >= (buf)->used) {\
BBUF_WRITE(buf,pos,bytes,n);\
}\
else {\
BBUF_MOVE_RIGHT((buf),(pos),(pos) + (n),((buf)->used - (pos)));\
xmemcpy((buf)->p + (pos), (bytes), (n));\
}\
} while (0)
#define BBUF_GET_BYTE(buf, pos) (buf)->p[(pos)]
#define ANCHOR_BEGIN_BUF (1<<0)
#define ANCHOR_BEGIN_LINE (1<<1)
#define ANCHOR_BEGIN_POSITION (1<<2)
#define ANCHOR_END_BUF (1<<3)
#define ANCHOR_SEMI_END_BUF (1<<4)
#define ANCHOR_END_LINE (1<<5)
#define ANCHOR_WORD_BOUND (1<<6)
#define ANCHOR_NOT_WORD_BOUND (1<<7)
#define ANCHOR_WORD_BEGIN (1<<8)
#define ANCHOR_WORD_END (1<<9)
#define ANCHOR_PREC_READ (1<<10)
#define ANCHOR_PREC_READ_NOT (1<<11)
#define ANCHOR_LOOK_BEHIND (1<<12)
#define ANCHOR_LOOK_BEHIND_NOT (1<<13)
#define ANCHOR_ANYCHAR_STAR (1<<14) /* ".*" optimize info */
#define ANCHOR_ANYCHAR_STAR_ML (1<<15) /* ".*" optimize info (multi-line) */
#define ANCHOR_KEEP (1<<16)
/* operation code */
enum OpCode {
OP_FINISH = 0, /* matching process terminator (no more alternative) */
OP_END = 1, /* pattern code terminator (success end) */
OP_EXACT1 = 2, /* single byte, N = 1 */
OP_EXACT2, /* single byte, N = 2 */
OP_EXACT3, /* single byte, N = 3 */
OP_EXACT4, /* single byte, N = 4 */
OP_EXACT5, /* single byte, N = 5 */
OP_EXACTN, /* single byte */
OP_EXACTMB2N1, /* mb-length = 2 N = 1 */
OP_EXACTMB2N2, /* mb-length = 2 N = 2 */
OP_EXACTMB2N3, /* mb-length = 2 N = 3 */
OP_EXACTMB2N, /* mb-length = 2 */
OP_EXACTMB3N, /* mb-length = 3 */
OP_EXACTMBN, /* other length */
OP_EXACT1_IC, /* single byte, N = 1, ignore case */
OP_EXACTN_IC, /* single byte, ignore case */
OP_CCLASS,
OP_CCLASS_MB,
OP_CCLASS_MIX,
OP_CCLASS_NOT,
OP_CCLASS_MB_NOT,
OP_CCLASS_MIX_NOT,
OP_CCLASS_NODE, /* pointer to CClassNode node */
OP_ANYCHAR, /* "." */
OP_ANYCHAR_ML, /* "." multi-line */
OP_ANYCHAR_STAR, /* ".*" */
OP_ANYCHAR_ML_STAR, /* ".*" multi-line */
OP_ANYCHAR_STAR_PEEK_NEXT,
OP_ANYCHAR_ML_STAR_PEEK_NEXT,
OP_WORD,
OP_NOT_WORD,
OP_WORD_BOUND,
OP_NOT_WORD_BOUND,
OP_WORD_BEGIN,
OP_WORD_END,
OP_ASCII_WORD,
OP_NOT_ASCII_WORD,
OP_ASCII_WORD_BOUND,
OP_NOT_ASCII_WORD_BOUND,
OP_ASCII_WORD_BEGIN,
OP_ASCII_WORD_END,
OP_BEGIN_BUF,
OP_END_BUF,
OP_BEGIN_LINE,
OP_END_LINE,
OP_SEMI_END_BUF,
OP_BEGIN_POSITION,
OP_BEGIN_POS_OR_LINE, /* used for implicit anchor optimization */
OP_BACKREF1,
OP_BACKREF2,
OP_BACKREFN,
OP_BACKREFN_IC,
OP_BACKREF_MULTI,
OP_BACKREF_MULTI_IC,
OP_BACKREF_WITH_LEVEL, /* \k<xxx+n>, \k<xxx-n> */
OP_MEMORY_START,
OP_MEMORY_START_PUSH, /* push back-tracker to stack */
OP_MEMORY_END_PUSH, /* push back-tracker to stack */
OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */
OP_MEMORY_END,
OP_MEMORY_END_REC, /* push marker to stack */
OP_KEEP,
OP_FAIL, /* pop stack and move */
OP_JUMP,
OP_PUSH,
OP_POP,
OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */
OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */
OP_REPEAT, /* {n,m} */
OP_REPEAT_NG, /* {n,m}? (non greedy) */
OP_REPEAT_INC,
OP_REPEAT_INC_NG, /* non greedy */
OP_REPEAT_INC_SG, /* search and get in stack */
OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */
OP_NULL_CHECK_START, /* null loop checker start */
OP_NULL_CHECK_END, /* null loop checker end */
OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */
OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */
OP_PUSH_POS, /* (?=...) start */
OP_POP_POS, /* (?=...) end */
OP_PUSH_POS_NOT, /* (?!...) start */
OP_FAIL_POS, /* (?!...) end */
OP_PUSH_STOP_BT, /* (?>...) start */
OP_POP_STOP_BT, /* (?>...) end */
OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */
OP_PUSH_LOOK_BEHIND_NOT, /* (?<!...) start */
OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end */
OP_CALL, /* \g<name> */
OP_RETURN,
OP_CONDITION,
OP_STATE_CHECK_PUSH, /* combination explosion check and push */
OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */
OP_STATE_CHECK, /* check only */
OP_STATE_CHECK_ANYCHAR_STAR,
OP_STATE_CHECK_ANYCHAR_ML_STAR,
/* no need: IS_DYNAMIC_OPTION() == 0 */
OP_SET_OPTION_PUSH, /* set option and push recover option */
OP_SET_OPTION /* set option */
};
typedef int RelAddrType;
typedef int AbsAddrType;
typedef int LengthType;
typedef int RepeatNumType;
typedef short int MemNumType;
typedef short int StateCheckNumType;
typedef void* PointerType;
#define SIZE_OPCODE 1
#define SIZE_RELADDR (int )sizeof(RelAddrType)
#define SIZE_ABSADDR (int )sizeof(AbsAddrType)
#define SIZE_LENGTH (int )sizeof(LengthType)
#define SIZE_MEMNUM (int )sizeof(MemNumType)
#define SIZE_STATE_CHECK_NUM (int )sizeof(StateCheckNumType)
#define SIZE_REPEATNUM (int )sizeof(RepeatNumType)
#define SIZE_OPTION (int )sizeof(OnigOptionType)
#define SIZE_CODE_POINT (int )sizeof(OnigCodePoint)
#define SIZE_POINTER (int )sizeof(PointerType)
#define GET_RELADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, RelAddrType)
#define GET_ABSADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, AbsAddrType)
#define GET_LENGTH_INC(len,p) PLATFORM_GET_INC(len, p, LengthType)
#define GET_MEMNUM_INC(num,p) PLATFORM_GET_INC(num, p, MemNumType)
#define GET_REPEATNUM_INC(num,p) PLATFORM_GET_INC(num, p, RepeatNumType)
#define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType)
#define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType)
#define GET_STATE_CHECK_NUM_INC(num,p) PLATFORM_GET_INC(num, p, StateCheckNumType)
/* code point's address must be aligned address. */
#define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p))
#define GET_BYTE_INC(byte,p) do{\
byte = *(p);\
(p)++;\
} while(0)
/* op-code + arg size */
#define SIZE_OP_ANYCHAR_STAR SIZE_OPCODE
#define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT (SIZE_OPCODE + 1)
#define SIZE_OP_JUMP (SIZE_OPCODE + SIZE_RELADDR)
#define SIZE_OP_PUSH (SIZE_OPCODE + SIZE_RELADDR)
#define SIZE_OP_POP SIZE_OPCODE
#define SIZE_OP_PUSH_OR_JUMP_EXACT1 (SIZE_OPCODE + SIZE_RELADDR + 1)
#define SIZE_OP_PUSH_IF_PEEK_NEXT (SIZE_OPCODE + SIZE_RELADDR + 1)
#define SIZE_OP_REPEAT_INC (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_REPEAT_INC_NG (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_PUSH_POS SIZE_OPCODE
#define SIZE_OP_PUSH_POS_NOT (SIZE_OPCODE + SIZE_RELADDR)
#define SIZE_OP_POP_POS SIZE_OPCODE
#define SIZE_OP_FAIL_POS SIZE_OPCODE
#define SIZE_OP_SET_OPTION (SIZE_OPCODE + SIZE_OPTION)
#define SIZE_OP_SET_OPTION_PUSH (SIZE_OPCODE + SIZE_OPTION)
#define SIZE_OP_FAIL SIZE_OPCODE
#define SIZE_OP_MEMORY_START (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_MEMORY_START_PUSH (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_MEMORY_END_PUSH (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_MEMORY_END_PUSH_REC (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_MEMORY_END (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_MEMORY_END_REC (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_PUSH_STOP_BT SIZE_OPCODE
#define SIZE_OP_POP_STOP_BT SIZE_OPCODE
#define SIZE_OP_NULL_CHECK_START (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_NULL_CHECK_END (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_LOOK_BEHIND (SIZE_OPCODE + SIZE_LENGTH)
#define SIZE_OP_PUSH_LOOK_BEHIND_NOT (SIZE_OPCODE + SIZE_RELADDR + SIZE_LENGTH)
#define SIZE_OP_FAIL_LOOK_BEHIND_NOT SIZE_OPCODE
#define SIZE_OP_CALL (SIZE_OPCODE + SIZE_ABSADDR)
#define SIZE_OP_RETURN SIZE_OPCODE
#define SIZE_OP_CONDITION (SIZE_OPCODE + SIZE_MEMNUM + SIZE_RELADDR)
#ifdef USE_COMBINATION_EXPLOSION_CHECK
#define SIZE_OP_STATE_CHECK (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
#define SIZE_OP_STATE_CHECK_PUSH (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
#define SIZE_OP_STATE_CHECK_PUSH_OR_JUMP (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
#define SIZE_OP_STATE_CHECK_ANYCHAR_STAR (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
#endif
#define MC_ESC(syn) (syn)->meta_char_table.esc
#define MC_ANYCHAR(syn) (syn)->meta_char_table.anychar
#define MC_ANYTIME(syn) (syn)->meta_char_table.anytime
#define MC_ZERO_OR_ONE_TIME(syn) (syn)->meta_char_table.zero_or_one_time
#define MC_ONE_OR_MORE_TIME(syn) (syn)->meta_char_table.one_or_more_time
#define MC_ANYCHAR_ANYTIME(syn) (syn)->meta_char_table.anychar_anytime
#define IS_MC_ESC_CODE(code, syn) \
((code) == MC_ESC(syn) && \
!IS_SYNTAX_OP2((syn), ONIG_SYN_OP2_INEFFECTIVE_ESCAPE))
#define SYN_POSIX_COMMON_OP \
( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \
ONIG_SYN_OP_DECIMAL_BACKREF | \
ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_ASTERISK_ZERO_INF | \
ONIG_SYN_OP_LINE_ANCHOR | \
ONIG_SYN_OP_ESC_CONTROL_CHARS )
#define SYN_GNU_REGEX_OP \
( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | \
ONIG_SYN_OP_POSIX_BRACKET | ONIG_SYN_OP_DECIMAL_BACKREF | \
ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_LPAREN_SUBEXP | \
ONIG_SYN_OP_VBAR_ALT | \
ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | \
ONIG_SYN_OP_QMARK_ZERO_ONE | \
ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR | ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR | \
ONIG_SYN_OP_ESC_W_WORD | \
ONIG_SYN_OP_ESC_B_WORD_BOUND | ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | \
ONIG_SYN_OP_ESC_S_WHITE_SPACE | ONIG_SYN_OP_ESC_D_DIGIT | \
ONIG_SYN_OP_LINE_ANCHOR )
#define SYN_GNU_REGEX_BV \
( ONIG_SYN_CONTEXT_INDEP_ANCHORS | ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | \
ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \
ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
#define NCCLASS_FLAGS(cc) ((cc)->flags)
#define NCCLASS_FLAG_SET(cc,flag) (NCCLASS_FLAGS(cc) |= (flag))
#define NCCLASS_FLAG_CLEAR(cc,flag) (NCCLASS_FLAGS(cc) &= ~(flag))
#define IS_NCCLASS_FLAG_ON(cc,flag) ((NCCLASS_FLAGS(cc) & (flag)) != 0)
/* cclass node */
#define FLAG_NCCLASS_NOT (1<<0)
#define FLAG_NCCLASS_SHARE (1<<1)
#define NCCLASS_SET_NOT(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_NOT)
#define NCCLASS_SET_SHARE(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_SHARE)
#define NCCLASS_CLEAR_NOT(nd) NCCLASS_FLAG_CLEAR(nd, FLAG_NCCLASS_NOT)
#define IS_NCCLASS_NOT(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_NOT)
#define IS_NCCLASS_SHARE(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_SHARE)
typedef struct {
int type;
/* struct _Node* next; */
/* unsigned int flags; */
} NodeBase;
typedef struct {
NodeBase base;
unsigned int flags;
BitSet bs;
BBuf* mbuf; /* multi-byte info or NULL */
} CClassNode;
typedef intptr_t OnigStackIndex;
typedef struct _OnigStackType {
unsigned int type;
union {
struct {
UChar *pcode; /* byte code position */
UChar *pstr; /* string position */
UChar *pstr_prev; /* previous char position of pstr */
#ifdef USE_COMBINATION_EXPLOSION_CHECK
unsigned int state_check;
#endif
UChar *pkeep; /* keep pattern position */
} state;
struct {
int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
UChar *pcode; /* byte code position (head of repeated target) */
int num; /* repeat id */
} repeat;
struct {
OnigStackIndex si; /* index of stack */
} repeat_inc;
struct {
int num; /* memory num */
UChar *pstr; /* start/end position */
/* Following information is set, if this stack type is MEM-START */
OnigStackIndex start; /* prev. info (for backtrack "(...)*" ) */
OnigStackIndex end; /* prev. info (for backtrack "(...)*" ) */
} mem;
struct {
int num; /* null check id */
UChar *pstr; /* start position */
} null_check;
#ifdef USE_SUBEXP_CALL
struct {
UChar *ret_addr; /* byte code position */
int num; /* null check id */
UChar *pstr; /* string position */
} call_frame;
#endif
} u;
} OnigStackType;
typedef struct {
void* stack_p;
size_t stack_n;
OnigOptionType options;
OnigRegion* region;
const UChar* start; /* search start position */
const UChar* gpos; /* global position (for \G: BEGIN_POSITION) */
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
OnigPosition best_len; /* for ONIG_OPTION_FIND_LONGEST */
UChar* best_s;
#endif
#ifdef USE_COMBINATION_EXPLOSION_CHECK
void* state_check_buff;
int state_check_buff_size;
#endif
} OnigMatchArg;
#define IS_CODE_SB_WORD(enc,code) \
(ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code))
#ifdef ONIG_DEBUG
typedef struct {
short int opcode;
const char* name;
short int arg_type;
} OnigOpInfoType;
extern OnigOpInfoType OnigOpInfo[];
extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp, OnigEncoding enc));
#ifdef ONIG_DEBUG_STATISTICS
extern void onig_statistics_init P_((void));
extern void onig_print_statistics P_((FILE* f));
#endif
#endif
extern UChar* onig_error_code_to_format P_((OnigPosition code));
extern void onig_snprintf_with_pattern PV_((UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...));
extern int onig_bbuf_init P_((BBuf* buf, OnigDistance size));
extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo));
extern void onig_chain_reduce P_((regex_t* reg));
extern void onig_chain_link_add P_((regex_t* to, regex_t* add));
extern void onig_transfer P_((regex_t* to, regex_t* from));
extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
extern int onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, CClassNode* cc));
/* strend hash */
typedef void hash_table_type;
typedef uintptr_t hash_data_type;
extern hash_table_type* onig_st_init_strend_table_with_size P_((int size));
extern int onig_st_lookup_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type *value));
extern int onig_st_insert_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type value));
/* encoding property management */
#define PROPERTY_LIST_ADD_PROP(Name, CR) \
r = onigenc_property_list_add_property((UChar* )Name, CR,\
&PropertyNameTable, &PropertyList, &PropertyListNum,\
&PropertyListSize);\
if (r != 0) goto end
#define PROPERTY_LIST_INIT_CHECK \
if (PropertyInited == 0) {\
int r = onigenc_property_list_init(init_property_list);\
if (r != 0) return r;\
}
extern int onigenc_property_list_add_property P_((UChar* name, const OnigCodePoint* prop, hash_table_type **table, const OnigCodePoint*** plist, int *pnum, int *psize));
typedef int (*ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE)(void);
extern int onigenc_property_list_init P_((ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE));
extern size_t onig_memsize P_((const regex_t *reg));
extern size_t onig_region_memsize P_((const struct re_registers *regs));
#endif /* REGINT_H */

6243
src/Onigmo/regparse.c Normal file

File diff suppressed because it is too large Load Diff

356
src/Onigmo/regparse.h Normal file
View File

@ -0,0 +1,356 @@
#ifndef REGPARSE_H
#define REGPARSE_H
/**********************************************************************
regparse.h - Onigmo (Oniguruma-mod) (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regint.h"
/* node type */
#define NT_STR 0
#define NT_CCLASS 1
#define NT_CTYPE 2
#define NT_CANY 3
#define NT_BREF 4
#define NT_QTFR 5
#define NT_ENCLOSE 6
#define NT_ANCHOR 7
#define NT_LIST 8
#define NT_ALT 9
#define NT_CALL 10
/* node type bit */
#define NTYPE2BIT(type) (1<<(type))
#define BIT_NT_STR NTYPE2BIT(NT_STR)
#define BIT_NT_CCLASS NTYPE2BIT(NT_CCLASS)
#define BIT_NT_CTYPE NTYPE2BIT(NT_CTYPE)
#define BIT_NT_CANY NTYPE2BIT(NT_CANY)
#define BIT_NT_BREF NTYPE2BIT(NT_BREF)
#define BIT_NT_QTFR NTYPE2BIT(NT_QTFR)
#define BIT_NT_ENCLOSE NTYPE2BIT(NT_ENCLOSE)
#define BIT_NT_ANCHOR NTYPE2BIT(NT_ANCHOR)
#define BIT_NT_LIST NTYPE2BIT(NT_LIST)
#define BIT_NT_ALT NTYPE2BIT(NT_ALT)
#define BIT_NT_CALL NTYPE2BIT(NT_CALL)
#define IS_NODE_TYPE_SIMPLE(type) \
((NTYPE2BIT(type) & (BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE |\
BIT_NT_CANY | BIT_NT_BREF)) != 0)
#define NTYPE(node) ((node)->u.base.type)
#define SET_NTYPE(node, ntype) (node)->u.base.type = (ntype)
#define NSTR(node) (&((node)->u.str))
#define NCCLASS(node) (&((node)->u.cclass))
#define NCTYPE(node) (&((node)->u.ctype))
#define NBREF(node) (&((node)->u.bref))
#define NQTFR(node) (&((node)->u.qtfr))
#define NENCLOSE(node) (&((node)->u.enclose))
#define NANCHOR(node) (&((node)->u.anchor))
#define NCONS(node) (&((node)->u.cons))
#define NCALL(node) (&((node)->u.call))
#define NCAR(node) (NCONS(node)->car)
#define NCDR(node) (NCONS(node)->cdr)
#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML)
#define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
#define ENCLOSE_MEMORY (1<<0)
#define ENCLOSE_OPTION (1<<1)
#define ENCLOSE_STOP_BACKTRACK (1<<2)
#define ENCLOSE_CONDITION (1<<3)
#define NODE_STR_MARGIN 16
#define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
#define NODE_BACKREFS_SIZE 6
#define NSTR_RAW (1<<0) /* by backslashed number */
#define NSTR_AMBIG (1<<1)
#define NSTR_DONT_GET_OPT_INFO (1<<2)
#define NSTRING_LEN(node) (OnigDistance )((node)->u.str.end - (node)->u.str.s)
#define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW
#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW
#define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= NSTR_AMBIG
#define NSTRING_SET_DONT_GET_OPT_INFO(node) \
(node)->u.str.flag |= NSTR_DONT_GET_OPT_INFO
#define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0)
#define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & NSTR_AMBIG) != 0)
#define NSTRING_IS_DONT_GET_OPT_INFO(node) \
(((node)->u.str.flag & NSTR_DONT_GET_OPT_INFO) != 0)
#define BACKREFS_P(br) \
(IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static);
#define NQ_TARGET_ISNOT_EMPTY 0
#define NQ_TARGET_IS_EMPTY 1
#define NQ_TARGET_IS_EMPTY_MEM 2
#define NQ_TARGET_IS_EMPTY_REC 3
/* status bits */
#define NST_MIN_FIXED (1<<0)
#define NST_MAX_FIXED (1<<1)
#define NST_CLEN_FIXED (1<<2)
#define NST_MARK1 (1<<3)
#define NST_MARK2 (1<<4)
#define NST_MEM_BACKREFED (1<<5)
#define NST_STOP_BT_SIMPLE_REPEAT (1<<6)
#define NST_RECURSION (1<<7)
#define NST_CALLED (1<<8)
#define NST_ADDR_FIXED (1<<9)
#define NST_NAMED_GROUP (1<<10)
#define NST_NAME_REF (1<<11)
#define NST_IN_REPEAT (1<<12) /* STK_REPEAT is nested in stack. */
#define NST_NEST_LEVEL (1<<13)
#define NST_BY_NUMBER (1<<14) /* {n,m} */
#define SET_ENCLOSE_STATUS(node,f) (node)->u.enclose.state |= (f)
#define CLEAR_ENCLOSE_STATUS(node,f) (node)->u.enclose.state &= ~(f)
#define IS_ENCLOSE_CALLED(en) (((en)->state & NST_CALLED) != 0)
#define IS_ENCLOSE_ADDR_FIXED(en) (((en)->state & NST_ADDR_FIXED) != 0)
#define IS_ENCLOSE_RECURSION(en) (((en)->state & NST_RECURSION) != 0)
#define IS_ENCLOSE_MARK1(en) (((en)->state & NST_MARK1) != 0)
#define IS_ENCLOSE_MARK2(en) (((en)->state & NST_MARK2) != 0)
#define IS_ENCLOSE_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0)
#define IS_ENCLOSE_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0)
#define IS_ENCLOSE_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0)
#define IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(en) \
(((en)->state & NST_STOP_BT_SIMPLE_REPEAT) != 0)
#define IS_ENCLOSE_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0)
#define IS_ENCLOSE_NAME_REF(en) (((en)->state & NST_NAME_REF) != 0)
#define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION
#define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0)
#define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0)
#define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0)
#define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0)
#define IS_QUANTIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0)
#define IS_QUANTIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0)
#define CALLNODE_REFNUM_UNDEF -1
typedef struct {
NodeBase base;
UChar* s;
UChar* end;
unsigned int flag;
int capa; /* (allocated size - 1) or 0: use buf[] */
UChar buf[NODE_STR_BUF_SIZE];
} StrNode;
typedef struct {
NodeBase base;
int state;
struct _Node* target;
int lower;
int upper;
int greedy;
int target_empty_info;
struct _Node* head_exact;
struct _Node* next_head_exact;
int is_refered; /* include called node. don't eliminate even if {0} */
#ifdef USE_COMBINATION_EXPLOSION_CHECK
int comb_exp_check_num; /* 1,2,3...: check, 0: no check */
#endif
} QtfrNode;
typedef struct {
NodeBase base;
int state;
int type;
int regnum;
OnigOptionType option;
struct _Node* target;
AbsAddrType call_addr;
/* for multiple call reference */
OnigDistance min_len; /* min length (byte) */
OnigDistance max_len; /* max length (byte) */
int char_len; /* character length */
int opt_count; /* referenced count in optimize_node_left() */
} EncloseNode;
#ifdef USE_SUBEXP_CALL
typedef struct {
int offset;
struct _Node* target;
} UnsetAddr;
typedef struct {
int num;
int alloc;
UnsetAddr* us;
} UnsetAddrList;
typedef struct {
NodeBase base;
int state;
int group_num;
UChar* name;
UChar* name_end;
struct _Node* target; /* EncloseNode : ENCLOSE_MEMORY */
UnsetAddrList* unset_addr_list;
} CallNode;
#endif
typedef struct {
NodeBase base;
int state;
int back_num;
int back_static[NODE_BACKREFS_SIZE];
int* back_dynamic;
int nest_level;
} BRefNode;
typedef struct {
NodeBase base;
int type;
struct _Node* target;
int char_len;
int ascii_range;
} AnchorNode;
typedef struct {
NodeBase base;
struct _Node* car;
struct _Node* cdr;
} ConsAltNode;
typedef struct {
NodeBase base;
int ctype;
int not;
int ascii_range;
} CtypeNode;
typedef struct _Node {
union {
NodeBase base;
StrNode str;
CClassNode cclass;
QtfrNode qtfr;
EncloseNode enclose;
BRefNode bref;
AnchorNode anchor;
ConsAltNode cons;
CtypeNode ctype;
#ifdef USE_SUBEXP_CALL
CallNode call;
#endif
} u;
} Node;
#define NULL_NODE ((Node* )0)
#define SCANENV_MEMNODES_SIZE 8
#define SCANENV_MEM_NODES(senv) \
(IS_NOT_NULL((senv)->mem_nodes_dynamic) ? \
(senv)->mem_nodes_dynamic : (senv)->mem_nodes_static)
typedef struct {
OnigOptionType option;
OnigCaseFoldType case_fold_flag;
OnigEncoding enc;
OnigSyntaxType* syntax;
BitStatusType capture_history;
BitStatusType bt_mem_start;
BitStatusType bt_mem_end;
BitStatusType backrefed_mem;
UChar* pattern;
UChar* pattern_end;
UChar* error;
UChar* error_end;
regex_t* reg; /* for reg->names only */
int num_call;
#ifdef USE_SUBEXP_CALL
UnsetAddrList* unset_addr_list;
#endif
int num_mem;
#ifdef USE_NAMED_GROUP
int num_named;
#endif
int mem_alloc;
Node* mem_nodes_static[SCANENV_MEMNODES_SIZE];
Node** mem_nodes_dynamic;
#ifdef USE_COMBINATION_EXPLOSION_CHECK
int num_comb_exp_check;
int comb_exp_max_regnum;
int curr_max_regnum;
int has_recursion;
#endif
} ScanEnv;
#define IS_SYNTAX_OP(syn, opm) (((syn)->op & (opm)) != 0)
#define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0)
#define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0)
#ifdef USE_NAMED_GROUP
typedef struct {
int new_val;
} GroupNumRemap;
extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map));
#endif
extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n));
extern void onig_strcpy P_((UChar* dest, const UChar* src, const UChar* end));
extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
extern int onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc));
extern void onig_reduce_nested_quantifier P_((Node* pnode, Node* cnode));
extern void onig_node_conv_to_str_node P_((Node* node, int raw));
extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));
extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end));
extern void onig_node_free P_((Node* node));
extern Node* onig_node_new_enclose P_((int type));
extern Node* onig_node_new_anchor P_((int type));
extern Node* onig_node_new_str P_((const UChar* s, const UChar* end));
extern Node* onig_node_new_list P_((Node* left, Node* right));
extern Node* onig_node_list_add P_((Node* list, Node* x));
extern Node* onig_node_new_alt P_((Node* left, Node* right));
extern void onig_node_str_clear P_((Node* node));
extern int onig_free_node_list P_((void));
extern int onig_names_free P_((regex_t* reg));
extern int onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
extern int onig_free_shared_cclass_table P_((void));
#ifdef ONIG_DEBUG
#ifdef USE_NAMED_GROUP
extern int onig_print_names(FILE*, regex_t*);
#endif
#endif
#endif /* REGPARSE_H */

98
src/Onigmo/regposerr.c Normal file
View File

@ -0,0 +1,98 @@
/**********************************************************************
regposerr.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "config.h"
#include "onigposix.h"
#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif
#if defined(__GNUC__)
# define ARG_UNUSED __attribute__ ((unused))
#else
# define ARG_UNUSED
#endif
static char* ESTRING[] = {
NULL,
"failed to match", /* REG_NOMATCH */
"Invalid regular expression", /* REG_BADPAT */
"invalid collating element referenced", /* REG_ECOLLATE */
"invalid character class type referenced", /* REG_ECTYPE */
"bad backslash-escape sequence", /* REG_EESCAPE */
"invalid back reference number", /* REG_ESUBREG */
"imbalanced [ and ]", /* REG_EBRACK */
"imbalanced ( and )", /* REG_EPAREN */
"imbalanced { and }", /* REG_EBRACE */
"invalid repeat range {n,m}", /* REG_BADBR */
"invalid range", /* REG_ERANGE */
"Out of memory", /* REG_ESPACE */
"? * + not preceded by valid regular expression", /* REG_BADRPT */
/* Extended errors */
"internal error", /* REG_EONIG_INTERNAL */
"invalid wide char value", /* REG_EONIG_BADWC */
"invalid argument", /* REG_EONIG_BADARG */
"multi-thread error" /* REG_EONIG_THREAD */
};
#include <stdio.h>
extern size_t
regerror(int posix_ecode, const regex_t* reg ARG_UNUSED, char* buf,
size_t size)
{
char* s;
char tbuf[35];
size_t len;
if (posix_ecode > 0
&& posix_ecode < (int )(sizeof(ESTRING) / sizeof(ESTRING[0]))) {
s = ESTRING[posix_ecode];
}
else if (posix_ecode == 0) {
s = "";
}
else {
sprintf(tbuf, "undefined error code (%d)", posix_ecode);
s = tbuf;
}
len = strlen(s) + 1; /* use strlen() because s is ascii encoding. */
if (buf != NULL && size > 0) {
strncpy(buf, s, size - 1);
buf[size - 1] = '\0';
}
return len;
}

304
src/Onigmo/regposix.c Normal file
View File

@ -0,0 +1,304 @@
/**********************************************************************
regposix.c - Onigmo (Oniguruma-mod) (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#define regex_t onig_regex_t
#include "regint.h"
#undef regex_t
#include "onigposix.h"
#define ONIG_C(reg) ((onig_regex_t* )((reg)->onig))
#define PONIG_C(reg) ((onig_regex_t** )(&(reg)->onig))
/* #define ENC_STRING_LEN(enc,s,len) len = strlen(s) */
#define ENC_STRING_LEN(enc,s,len) do { \
if (ONIGENC_MBC_MINLEN(enc) == 1) { \
UChar* tmps = (UChar* )(s); \
while (*tmps != 0) tmps++; \
len = (int )(tmps - (UChar* )(s)); \
} \
else { \
len = onigenc_str_bytelen_null(enc, (UChar* )s); \
} \
} while(0)
typedef struct {
int onig_err;
int posix_err;
} O2PERR;
static int
onig2posix_error_code(int code)
{
static const O2PERR o2p[] = {
{ ONIG_MISMATCH, REG_NOMATCH },
{ ONIG_NO_SUPPORT_CONFIG, REG_EONIG_INTERNAL },
{ ONIGERR_MEMORY, REG_ESPACE },
{ ONIGERR_MATCH_STACK_LIMIT_OVER, REG_EONIG_INTERNAL },
{ ONIGERR_TYPE_BUG, REG_EONIG_INTERNAL },
{ ONIGERR_PARSER_BUG, REG_EONIG_INTERNAL },
{ ONIGERR_STACK_BUG, REG_EONIG_INTERNAL },
{ ONIGERR_UNDEFINED_BYTECODE, REG_EONIG_INTERNAL },
{ ONIGERR_UNEXPECTED_BYTECODE, REG_EONIG_INTERNAL },
{ ONIGERR_DEFAULT_ENCODING_IS_NOT_SET, REG_EONIG_BADARG },
{ ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR, REG_EONIG_BADARG },
{ ONIGERR_INVALID_ARGUMENT, REG_EONIG_BADARG },
{ ONIGERR_END_PATTERN_AT_LEFT_BRACE, REG_EBRACE },
{ ONIGERR_END_PATTERN_AT_LEFT_BRACKET, REG_EBRACK },
{ ONIGERR_EMPTY_CHAR_CLASS, REG_ECTYPE },
{ ONIGERR_PREMATURE_END_OF_CHAR_CLASS, REG_ECTYPE },
{ ONIGERR_END_PATTERN_AT_ESCAPE, REG_EESCAPE },
{ ONIGERR_END_PATTERN_AT_META, REG_EESCAPE },
{ ONIGERR_END_PATTERN_AT_CONTROL, REG_EESCAPE },
{ ONIGERR_META_CODE_SYNTAX, REG_BADPAT },
{ ONIGERR_CONTROL_CODE_SYNTAX, REG_BADPAT },
{ ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE, REG_ECTYPE },
{ ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE, REG_ECTYPE },
{ ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS, REG_ECTYPE },
{ ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED, REG_BADRPT },
{ ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID, REG_BADRPT },
{ ONIGERR_NESTED_REPEAT_OPERATOR, REG_BADRPT },
{ ONIGERR_UNMATCHED_CLOSE_PARENTHESIS, REG_EPAREN },
{ ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS, REG_EPAREN },
{ ONIGERR_END_PATTERN_IN_GROUP, REG_BADPAT },
{ ONIGERR_UNDEFINED_GROUP_OPTION, REG_BADPAT },
{ ONIGERR_INVALID_POSIX_BRACKET_TYPE, REG_BADPAT },
{ ONIGERR_INVALID_LOOK_BEHIND_PATTERN, REG_BADPAT },
{ ONIGERR_INVALID_REPEAT_RANGE_PATTERN, REG_BADPAT },
{ ONIGERR_TOO_BIG_NUMBER, REG_BADPAT },
{ ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE, REG_BADBR },
{ ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE, REG_BADBR },
{ ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS, REG_ECTYPE },
{ ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE, REG_ECTYPE },
{ ONIGERR_TOO_MANY_MULTI_BYTE_RANGES, REG_ECTYPE },
{ ONIGERR_TOO_SHORT_MULTI_BYTE_STRING, REG_BADPAT },
{ ONIGERR_TOO_BIG_BACKREF_NUMBER, REG_ESUBREG },
{ ONIGERR_INVALID_BACKREF, REG_ESUBREG },
{ ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED, REG_BADPAT },
{ ONIGERR_TOO_BIG_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
{ ONIGERR_TOO_LONG_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
{ ONIGERR_INVALID_CODE_POINT_VALUE, REG_EONIG_BADWC },
{ ONIGERR_EMPTY_GROUP_NAME, REG_BADPAT },
{ ONIGERR_INVALID_GROUP_NAME, REG_BADPAT },
{ ONIGERR_INVALID_CHAR_IN_GROUP_NAME, REG_BADPAT },
{ ONIGERR_UNDEFINED_NAME_REFERENCE, REG_BADPAT },
{ ONIGERR_UNDEFINED_GROUP_REFERENCE, REG_BADPAT },
{ ONIGERR_MULTIPLEX_DEFINED_NAME, REG_BADPAT },
{ ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, REG_BADPAT },
{ ONIGERR_NEVER_ENDING_RECURSION, REG_BADPAT },
{ ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY, REG_BADPAT },
{ ONIGERR_INVALID_CHAR_PROPERTY_NAME, REG_BADPAT },
{ ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION, REG_EONIG_BADARG },
{ ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT, REG_EONIG_THREAD }
};
int i;
if (code >= 0) return 0;
for (i = 0; i < (int )(sizeof(o2p) / sizeof(o2p[0])); i++) {
if (code == o2p[i].onig_err)
return o2p[i].posix_err;
}
return REG_EONIG_INTERNAL; /* but, unknown error code */
}
extern int
regcomp(regex_t* reg, const char* pattern, int posix_options)
{
int r, len;
OnigSyntaxType* syntax = OnigDefaultSyntax;
OnigOptionType options;
if ((posix_options & REG_EXTENDED) == 0)
syntax = ONIG_SYNTAX_POSIX_BASIC;
options = syntax->options;
if ((posix_options & REG_ICASE) != 0)
ONIG_OPTION_ON(options, ONIG_OPTION_IGNORECASE);
if ((posix_options & REG_NEWLINE) != 0) {
ONIG_OPTION_ON( options, ONIG_OPTION_NEGATE_SINGLELINE);
ONIG_OPTION_OFF(options, ONIG_OPTION_SINGLELINE);
}
reg->comp_options = posix_options;
ENC_STRING_LEN(OnigEncDefaultCharEncoding, pattern, len);
r = onig_new(PONIG_C(reg), (UChar* )pattern, (UChar* )(pattern + len),
options, OnigEncDefaultCharEncoding, syntax,
(OnigErrorInfo* )NULL);
if (r != ONIG_NORMAL) {
return onig2posix_error_code(r);
}
reg->re_nsub = ONIG_C(reg)->num_mem;
return 0;
}
extern int
regexec(regex_t* reg, const char* str, size_t nmatch,
regmatch_t pmatch[], int posix_options)
{
int r, i, len;
UChar* end;
regmatch_t* pm;
OnigOptionType options;
options = ONIG_OPTION_POSIX_REGION;
if ((posix_options & REG_NOTBOL) != 0) options |= ONIG_OPTION_NOTBOL;
if ((posix_options & REG_NOTEOL) != 0) options |= ONIG_OPTION_NOTEOL;
if (nmatch == 0 || (reg->comp_options & REG_NOSUB) != 0) {
pm = (regmatch_t* )NULL;
nmatch = 0;
}
else if ((int )nmatch < ONIG_C(reg)->num_mem + 1) {
pm = (regmatch_t* )xmalloc(sizeof(regmatch_t)
* (ONIG_C(reg)->num_mem + 1));
if (pm == NULL)
return REG_ESPACE;
}
else {
pm = pmatch;
}
ENC_STRING_LEN(ONIG_C(reg)->enc, str, len);
end = (UChar* )(str + len);
r = (int )onig_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end,
(OnigRegion* )pm, options);
if (r >= 0) {
r = 0; /* Match */
if (pm != pmatch && pm != NULL) {
xmemcpy(pmatch, pm, sizeof(regmatch_t) * nmatch);
}
}
else if (r == ONIG_MISMATCH) {
r = REG_NOMATCH;
for (i = 0; i < (int )nmatch; i++)
pmatch[i].rm_so = pmatch[i].rm_eo = ONIG_REGION_NOTPOS;
}
else {
r = onig2posix_error_code(r);
}
if (pm != pmatch && pm != NULL)
xfree(pm);
#if 0
if (reg->re_nsub > nmatch - 1)
reg->re_nsub = (nmatch <= 1 ? 0 : nmatch - 1);
#endif
return r;
}
extern void
regfree(regex_t* reg)
{
onig_free(ONIG_C(reg));
}
extern void
reg_set_encoding(int mb_code)
{
OnigEncoding enc;
switch (mb_code) {
case REG_POSIX_ENCODING_ASCII:
enc = ONIG_ENCODING_ASCII;
break;
case REG_POSIX_ENCODING_EUC_JP:
enc = ONIG_ENCODING_EUC_JP;
break;
case REG_POSIX_ENCODING_SJIS:
enc = ONIG_ENCODING_SJIS;
break;
case REG_POSIX_ENCODING_UTF8:
enc = ONIG_ENCODING_UTF8;
break;
case REG_POSIX_ENCODING_UTF16_BE:
enc = ONIG_ENCODING_UTF16_BE;
break;
case REG_POSIX_ENCODING_UTF16_LE:
enc = ONIG_ENCODING_UTF16_LE;
break;
default:
return ;
break;
}
onigenc_set_default_encoding(enc);
}
extern int
reg_name_to_group_numbers(regex_t* reg,
const unsigned char* name, const unsigned char* name_end, int** nums)
{
return onig_name_to_group_numbers(ONIG_C(reg), name, name_end, nums);
}
typedef struct {
int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*);
regex_t* reg;
void* arg;
} i_wrap;
static int
i_wrapper(const UChar* name, const UChar* name_end, int ng, int* gs,
onig_regex_t* reg ARG_UNUSED, void* arg)
{
i_wrap* warg = (i_wrap* )arg;
return (*warg->func)(name, name_end, ng, gs, warg->reg, warg->arg);
}
extern int
reg_foreach_name(regex_t* reg,
int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*),
void* arg)
{
i_wrap warg;
warg.func = func;
warg.reg = reg;
warg.arg = arg;
return onig_foreach_name(ONIG_C(reg), i_wrapper, &warg);
}
extern int
reg_number_of_names(regex_t* reg)
{
return onig_number_of_names(ONIG_C(reg));
}

387
src/Onigmo/regsyntax.c Normal file
View File

@ -0,0 +1,387 @@
/**********************************************************************
regsyntax.c - Onigmo (Oniguruma-mod) (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2011-2012 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regint.h"
OnigSyntaxType OnigSyntaxASIS = {
0
, ONIG_SYN_OP2_INEFFECTIVE_ESCAPE
, 0
, ONIG_OPTION_NONE
,
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
}
};
OnigSyntaxType OnigSyntaxPosixBasic = {
( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
ONIG_SYN_OP_ESC_BRACE_INTERVAL )
, 0
, 0
, ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
,
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
}
};
OnigSyntaxType OnigSyntaxPosixExtended = {
( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_LPAREN_SUBEXP |
ONIG_SYN_OP_BRACE_INTERVAL |
ONIG_SYN_OP_PLUS_ONE_INF | ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_VBAR_ALT )
, 0
, ( ONIG_SYN_CONTEXT_INDEP_ANCHORS |
ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS |
ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP |
ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
, ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
,
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
}
};
OnigSyntaxType OnigSyntaxEmacs = {
( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC |
ONIG_SYN_OP_ESC_BRACE_INTERVAL |
ONIG_SYN_OP_ESC_LPAREN_SUBEXP | ONIG_SYN_OP_ESC_VBAR_ALT |
ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF |
ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_DECIMAL_BACKREF |
ONIG_SYN_OP_LINE_ANCHOR | ONIG_SYN_OP_ESC_CONTROL_CHARS )
, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR
, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
, ONIG_OPTION_NONE
,
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
}
};
OnigSyntaxType OnigSyntaxGrep = {
( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_POSIX_BRACKET |
ONIG_SYN_OP_ESC_BRACE_INTERVAL | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
ONIG_SYN_OP_ESC_VBAR_ALT |
ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_ESC_PLUS_ONE_INF |
ONIG_SYN_OP_ESC_QMARK_ZERO_ONE | ONIG_SYN_OP_LINE_ANCHOR |
ONIG_SYN_OP_ESC_W_WORD | ONIG_SYN_OP_ESC_B_WORD_BOUND |
ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | ONIG_SYN_OP_DECIMAL_BACKREF )
, 0
, ( ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC | ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC )
, ONIG_OPTION_NONE
,
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
}
};
OnigSyntaxType OnigSyntaxGnuRegex = {
SYN_GNU_REGEX_OP
, 0
, SYN_GNU_REGEX_BV
, ONIG_OPTION_NONE
,
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
}
};
OnigSyntaxType OnigSyntaxJava = {
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
ONIG_SYN_OP_ESC_CONTROL_CHARS | ONIG_SYN_OP_ESC_C_CONTROL |
ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 )
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
, ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
ONIG_SYN_OP2_OPTION_PERL | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL | ONIG_SYN_OP2_CCLASS_SET_OP |
ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_U_HEX4 |
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY )
, ( SYN_GNU_REGEX_BV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND )
, ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_ASCII_RANGE |
ONIG_OPTION_WORD_BOUND_ALL_RANGE )
,
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
}
};
/* Perl 5.8 */
OnigSyntaxType OnigSyntaxPerl58 = {
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
ONIG_SYN_OP_ESC_C_CONTROL )
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
, ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER |
ONIG_SYN_OP2_QMARK_LPAREN_CONDITION)
, SYN_GNU_REGEX_BV
, ONIG_OPTION_SINGLELINE
,
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
}
};
/* Perl 5.8 + named group */
OnigSyntaxType OnigSyntaxPerl58_NG = {
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
ONIG_SYN_OP_ESC_C_CONTROL )
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
, ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER |
ONIG_SYN_OP2_QMARK_LPAREN_CONDITION |
ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP |
ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
ONIG_SYN_OP2_ESC_G_SUBEXP_CALL )
, ( SYN_GNU_REGEX_BV |
ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME )
, ONIG_OPTION_SINGLELINE
,
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
}
};
/* Perl 5.10+ */
OnigSyntaxType OnigSyntaxPerl = {
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
ONIG_SYN_OP_ESC_C_CONTROL )
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
, ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER |
ONIG_SYN_OP2_QMARK_LPAREN_CONDITION |
ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL |
ONIG_SYN_OP2_ESC_CAPITAL_R_LINEBREAK |
ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |
ONIG_SYN_OP2_QMARK_SUBEXP_CALL |
ONIG_SYN_OP2_ESC_G_BRACE_BACKREF |
ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP |
ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP |
ONIG_SYN_OP2_ESC_K_NAMED_BACKREF )
, ( SYN_GNU_REGEX_BV |
ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL )
, ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_CAPTURE_GROUP )
,
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
}
};
OnigSyntaxType OnigSyntaxPython = {
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
ONIG_SYN_OP_ESC_C_CONTROL )
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
, ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
ONIG_SYN_OP2_ESC_V_VTAB |
ONIG_SYN_OP2_ESC_U_HEX4 |
ONIG_SYN_OP2_QMARK_LPAREN_CONDITION |
ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP )
, ( SYN_GNU_REGEX_BV |
ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV )
, ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_ASCII_RANGE )
,
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
}
};
extern int
onig_set_default_syntax(OnigSyntaxType* syntax)
{
if (IS_NULL(syntax))
syntax = ONIG_SYNTAX_RUBY;
OnigDefaultSyntax = syntax;
return 0;
}
extern void
onig_copy_syntax(OnigSyntaxType* to, const OnigSyntaxType* from)
{
*to = *from;
}
extern void
onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op)
{
syntax->op = op;
}
extern void
onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2)
{
syntax->op2 = op2;
}
extern void
onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior)
{
syntax->behavior = behavior;
}
extern void
onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options)
{
syntax->options = options;
}
extern unsigned int
onig_get_syntax_op(OnigSyntaxType* syntax)
{
return syntax->op;
}
extern unsigned int
onig_get_syntax_op2(OnigSyntaxType* syntax)
{
return syntax->op2;
}
extern unsigned int
onig_get_syntax_behavior(OnigSyntaxType* syntax)
{
return syntax->behavior;
}
extern OnigOptionType
onig_get_syntax_options(OnigSyntaxType* syntax)
{
return syntax->options;
}
#ifdef USE_VARIABLE_META_CHARS
extern int onig_set_meta_char(OnigSyntaxType* enc,
unsigned int what, OnigCodePoint code)
{
switch (what) {
case ONIG_META_CHAR_ESCAPE:
enc->meta_char_table.esc = code;
break;
case ONIG_META_CHAR_ANYCHAR:
enc->meta_char_table.anychar = code;
break;
case ONIG_META_CHAR_ANYTIME:
enc->meta_char_table.anytime = code;
break;
case ONIG_META_CHAR_ZERO_OR_ONE_TIME:
enc->meta_char_table.zero_or_one_time = code;
break;
case ONIG_META_CHAR_ONE_OR_MORE_TIME:
enc->meta_char_table.one_or_more_time = code;
break;
case ONIG_META_CHAR_ANYCHAR_ANYTIME:
enc->meta_char_table.anychar_anytime = code;
break;
default:
return ONIGERR_INVALID_ARGUMENT;
break;
}
return 0;
}
#endif /* USE_VARIABLE_META_CHARS */

78
src/Onigmo/regtrav.c Normal file
View File

@ -0,0 +1,78 @@
/**********************************************************************
regtrav.c - Onigmo (Oniguruma-mod) (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regint.h"
#ifdef USE_CAPTURE_HISTORY
static int
capture_tree_traverse(OnigCaptureTreeNode* node, int at,
int(*callback_func)(int,OnigPosition,OnigPosition,int,int,void*),
int level, void* arg)
{
int r, i;
if (node == (OnigCaptureTreeNode* )0)
return 0;
if ((at & ONIG_TRAVERSE_CALLBACK_AT_FIRST) != 0) {
r = (*callback_func)(node->group, node->beg, node->end,
level, ONIG_TRAVERSE_CALLBACK_AT_FIRST, arg);
if (r != 0) return r;
}
for (i = 0; i < node->num_childs; i++) {
r = capture_tree_traverse(node->childs[i], at,
callback_func, level + 1, arg);
if (r != 0) return r;
}
if ((at & ONIG_TRAVERSE_CALLBACK_AT_LAST) != 0) {
r = (*callback_func)(node->group, node->beg, node->end,
level, ONIG_TRAVERSE_CALLBACK_AT_LAST, arg);
if (r != 0) return r;
}
return 0;
}
#endif /* USE_CAPTURE_HISTORY */
extern int
onig_capture_tree_traverse(OnigRegion* region, int at,
int(*callback_func)(int,OnigPosition,OnigPosition,int,int,void*),
void* arg)
{
#ifdef USE_CAPTURE_HISTORY
return capture_tree_traverse(region->history_root, at,
callback_func, 0, arg);
#else
return ONIG_NO_SUPPORT_CONFIG;
#endif
}

58
src/Onigmo/regversion.c Normal file
View File

@ -0,0 +1,58 @@
/**********************************************************************
regversion.c - Onigmo (Oniguruma-mod) (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2009 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2011-2013 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "config.h"
#include "oniguruma.h"
#include <stdio.h>
extern const char*
onig_version(void)
{
static char s[12];
sprintf(s, "%d.%d.%d",
ONIGURUMA_VERSION_MAJOR,
ONIGURUMA_VERSION_MINOR,
ONIGURUMA_VERSION_TEENY);
return s;
}
extern const char*
onig_copyright(void)
{
static char s[80];
sprintf(s, "Onigmo %d.%d.%d : Copyright (C) 2002-2009 K.Kosako, "
"2011-2013 K.Takata",
ONIGURUMA_VERSION_MAJOR,
ONIGURUMA_VERSION_MINOR,
ONIGURUMA_VERSION_TEENY);
return s;
}

View File

@ -0,0 +1,25 @@
noinst_PROGRAMS = encode listcap names posix simple sql syntax crnl
libname = $(top_builddir)/libonig.la
LDADD = $(libname)
INCLUDES = -I$(top_srcdir) -I$(includedir)
encode_SOURCES = encode.c
listcap_SOURCES = listcap.c
names_SOURCES = names.c
posix_SOURCES = posix.c
simple_SOURCES = simple.c
sql_SOURCES = sql.c
syntax_SOURCES = syntax.c
sampledir = $(top_builddir)/sample
test: encode listcap names posix simple sql syntax
@$(sampledir)/encode
@$(sampledir)/listcap
@$(sampledir)/names
@$(sampledir)/posix
@$(sampledir)/simple
@$(sampledir)/sql
@$(sampledir)/syntax

View File

@ -0,0 +1,553 @@
# Makefile.in generated by automake 1.11.1 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
# Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
@SET_MAKE@
VPATH = @srcdir@
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
pkglibexecdir = $(libexecdir)/@PACKAGE@
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
install_sh_DATA = $(install_sh) -c -m 644
install_sh_PROGRAM = $(install_sh) -c
install_sh_SCRIPT = $(install_sh) -c
INSTALL_HEADER = $(INSTALL_DATA)
transform = $(program_transform_name)
NORMAL_INSTALL = :
PRE_INSTALL = :
POST_INSTALL = :
NORMAL_UNINSTALL = :
PRE_UNINSTALL = :
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
noinst_PROGRAMS = encode$(EXEEXT) listcap$(EXEEXT) names$(EXEEXT) \
posix$(EXEEXT) simple$(EXEEXT) sql$(EXEEXT) syntax$(EXEEXT) \
crnl$(EXEEXT)
subdir = sample
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \
$(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
$(top_srcdir)/configure.in
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
CONFIG_HEADER = $(top_builddir)/config.h
CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES =
PROGRAMS = $(noinst_PROGRAMS)
crnl_SOURCES = crnl.c
crnl_OBJECTS = crnl.$(OBJEXT)
crnl_LDADD = $(LDADD)
crnl_DEPENDENCIES = $(libname)
am_encode_OBJECTS = encode.$(OBJEXT)
encode_OBJECTS = $(am_encode_OBJECTS)
encode_LDADD = $(LDADD)
encode_DEPENDENCIES = $(libname)
am_listcap_OBJECTS = listcap.$(OBJEXT)
listcap_OBJECTS = $(am_listcap_OBJECTS)
listcap_LDADD = $(LDADD)
listcap_DEPENDENCIES = $(libname)
am_names_OBJECTS = names.$(OBJEXT)
names_OBJECTS = $(am_names_OBJECTS)
names_LDADD = $(LDADD)
names_DEPENDENCIES = $(libname)
am_posix_OBJECTS = posix.$(OBJEXT)
posix_OBJECTS = $(am_posix_OBJECTS)
posix_LDADD = $(LDADD)
posix_DEPENDENCIES = $(libname)
am_simple_OBJECTS = simple.$(OBJEXT)
simple_OBJECTS = $(am_simple_OBJECTS)
simple_LDADD = $(LDADD)
simple_DEPENDENCIES = $(libname)
am_sql_OBJECTS = sql.$(OBJEXT)
sql_OBJECTS = $(am_sql_OBJECTS)
sql_LDADD = $(LDADD)
sql_DEPENDENCIES = $(libname)
am_syntax_OBJECTS = syntax.$(OBJEXT)
syntax_OBJECTS = $(am_syntax_OBJECTS)
syntax_LDADD = $(LDADD)
syntax_DEPENDENCIES = $(libname)
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
depcomp = $(SHELL) $(top_srcdir)/depcomp
am__depfiles_maybe = depfiles
am__mv = mv -f
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
--mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
CCLD = $(CC)
LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
$(LDFLAGS) -o $@
SOURCES = crnl.c $(encode_SOURCES) $(listcap_SOURCES) $(names_SOURCES) \
$(posix_SOURCES) $(simple_SOURCES) $(sql_SOURCES) \
$(syntax_SOURCES)
DIST_SOURCES = crnl.c $(encode_SOURCES) $(listcap_SOURCES) \
$(names_SOURCES) $(posix_SOURCES) $(simple_SOURCES) \
$(sql_SOURCES) $(syntax_SOURCES)
ETAGS = etags
CTAGS = ctags
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
ACLOCAL = @ACLOCAL@
ALLOCA = @ALLOCA@
AMTAR = @AMTAR@
AR = @AR@
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
AUTOMAKE = @AUTOMAKE@
AWK = @AWK@
CC = @CC@
CCDEPMODE = @CCDEPMODE@
CFLAGS = @CFLAGS@
CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CYGPATH_W = @CYGPATH_W@
DEFS = @DEFS@
DEPDIR = @DEPDIR@
DSYMUTIL = @DSYMUTIL@
DUMPBIN = @DUMPBIN@
ECHO_C = @ECHO_C@
ECHO_N = @ECHO_N@
ECHO_T = @ECHO_T@
EGREP = @EGREP@
EXEEXT = @EXEEXT@
FGREP = @FGREP@
GREP = @GREP@
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
LD = @LD@
LDFLAGS = @LDFLAGS@
LIBOBJS = @LIBOBJS@
LIBS = @LIBS@
LIBTOOL = @LIBTOOL@
LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
LTVERSION = @LTVERSION@
MAKEINFO = @MAKEINFO@
MKDIR_P = @MKDIR_P@
NM = @NM@
NMEDIT = @NMEDIT@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
OTOOL = @OTOOL@
OTOOL64 = @OTOOL64@
PACKAGE = @PACKAGE@
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
PACKAGE_NAME = @PACKAGE_NAME@
PACKAGE_STRING = @PACKAGE_STRING@
PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
RANLIB = @RANLIB@
RUBYDIR = @RUBYDIR@
SED = @SED@
SET_MAKE = @SET_MAKE@
SHELL = @SHELL@
STATISTICS = @STATISTICS@
STRIP = @STRIP@
VERSION = @VERSION@
abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
abs_top_srcdir = @abs_top_srcdir@
ac_ct_CC = @ac_ct_CC@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
am__quote = @am__quote@
am__tar = @am__tar@
am__untar = @am__untar@
bindir = @bindir@
build = @build@
build_alias = @build_alias@
build_cpu = @build_cpu@
build_os = @build_os@
build_vendor = @build_vendor@
builddir = @builddir@
datadir = @datadir@
datarootdir = @datarootdir@
docdir = @docdir@
dvidir = @dvidir@
exec_prefix = @exec_prefix@
host = @host@
host_alias = @host_alias@
host_cpu = @host_cpu@
host_os = @host_os@
host_vendor = @host_vendor@
htmldir = @htmldir@
includedir = @includedir@
infodir = @infodir@
install_sh = @install_sh@
libdir = @libdir@
libexecdir = @libexecdir@
localedir = @localedir@
localstatedir = @localstatedir@
lt_ECHO = @lt_ECHO@
mandir = @mandir@
mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
program_transform_name = @program_transform_name@
psdir = @psdir@
sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
srcdir = @srcdir@
sysconfdir = @sysconfdir@
target_alias = @target_alias@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
libname = $(top_builddir)/libonig.la
LDADD = $(libname)
INCLUDES = -I$(top_srcdir) -I$(includedir)
encode_SOURCES = encode.c
listcap_SOURCES = listcap.c
names_SOURCES = names.c
posix_SOURCES = posix.c
simple_SOURCES = simple.c
sql_SOURCES = sql.c
syntax_SOURCES = syntax.c
sampledir = $(top_builddir)/sample
all: all-am
.SUFFIXES:
.SUFFIXES: .c .lo .o .obj
$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
&& { if test -f $@; then exit 0; else break; fi; }; \
exit 1;; \
esac; \
done; \
echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign sample/Makefile'; \
$(am__cd) $(top_srcdir) && \
$(AUTOMAKE) --foreign sample/Makefile
.PRECIOUS: Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \
*config.status*) \
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
*) \
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
esac;
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(top_srcdir)/configure: $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(ACLOCAL_M4): $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
clean-noinstPROGRAMS:
@list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \
echo " rm -f" $$list; \
rm -f $$list || exit $$?; \
test -n "$(EXEEXT)" || exit 0; \
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
crnl$(EXEEXT): $(crnl_OBJECTS) $(crnl_DEPENDENCIES)
@rm -f crnl$(EXEEXT)
$(LINK) $(crnl_OBJECTS) $(crnl_LDADD) $(LIBS)
encode$(EXEEXT): $(encode_OBJECTS) $(encode_DEPENDENCIES)
@rm -f encode$(EXEEXT)
$(LINK) $(encode_OBJECTS) $(encode_LDADD) $(LIBS)
listcap$(EXEEXT): $(listcap_OBJECTS) $(listcap_DEPENDENCIES)
@rm -f listcap$(EXEEXT)
$(LINK) $(listcap_OBJECTS) $(listcap_LDADD) $(LIBS)
names$(EXEEXT): $(names_OBJECTS) $(names_DEPENDENCIES)
@rm -f names$(EXEEXT)
$(LINK) $(names_OBJECTS) $(names_LDADD) $(LIBS)
posix$(EXEEXT): $(posix_OBJECTS) $(posix_DEPENDENCIES)
@rm -f posix$(EXEEXT)
$(LINK) $(posix_OBJECTS) $(posix_LDADD) $(LIBS)
simple$(EXEEXT): $(simple_OBJECTS) $(simple_DEPENDENCIES)
@rm -f simple$(EXEEXT)
$(LINK) $(simple_OBJECTS) $(simple_LDADD) $(LIBS)
sql$(EXEEXT): $(sql_OBJECTS) $(sql_DEPENDENCIES)
@rm -f sql$(EXEEXT)
$(LINK) $(sql_OBJECTS) $(sql_LDADD) $(LIBS)
syntax$(EXEEXT): $(syntax_OBJECTS) $(syntax_DEPENDENCIES)
@rm -f syntax$(EXEEXT)
$(LINK) $(syntax_OBJECTS) $(syntax_LDADD) $(LIBS)
mostlyclean-compile:
-rm -f *.$(OBJEXT)
distclean-compile:
-rm -f *.tab.c
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/crnl.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/encode.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/listcap.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/names.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/posix.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/simple.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sql.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/syntax.Po@am__quote@
.c.o:
@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(COMPILE) -c $<
.c.obj:
@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'`
.c.lo:
@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $<
mostlyclean-libtool:
-rm -f *.lo
clean-libtool:
-rm -rf .libs _libs
ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in files) print i; }; }'`; \
mkid -fID $$unique
tags: TAGS
TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
$(TAGS_FILES) $(LISP)
set x; \
here=`pwd`; \
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in files) print i; }; }'`; \
shift; \
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
test -n "$$unique" || unique=$$empty_fix; \
if test $$# -gt 0; then \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
"$$@" $$unique; \
else \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
$$unique; \
fi; \
fi
ctags: CTAGS
CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
$(TAGS_FILES) $(LISP)
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in files) print i; }; }'`; \
test -z "$(CTAGS_ARGS)$$unique" \
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
$$unique
GTAGS:
here=`$(am__cd) $(top_builddir) && pwd` \
&& $(am__cd) $(top_srcdir) \
&& gtags -i $(GTAGS_ARGS) "$$here"
distclean-tags:
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
distdir: $(DISTFILES)
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
list='$(DISTFILES)'; \
dist_files=`for file in $$list; do echo $$file; done | \
sed -e "s|^$$srcdirstrip/||;t" \
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
case $$dist_files in \
*/*) $(MKDIR_P) `echo "$$dist_files" | \
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
sort -u` ;; \
esac; \
for file in $$dist_files; do \
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
if test -d $$d/$$file; then \
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
if test -d "$(distdir)/$$file"; then \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
else \
test -f "$(distdir)/$$file" \
|| cp -p $$d/$$file "$(distdir)/$$file" \
|| exit 1; \
fi; \
done
check-am: all-am
check: check-am
all-am: Makefile $(PROGRAMS)
installdirs:
install: install-am
install-exec: install-exec-am
install-data: install-data-am
uninstall: uninstall-am
install-am: all-am
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
installcheck: installcheck-am
install-strip:
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
`test -z '$(STRIP)' || \
echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
mostlyclean-generic:
clean-generic:
distclean-generic:
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
@echo "it deletes files that may require special tools to rebuild."
clean: clean-am
clean-am: clean-generic clean-libtool clean-noinstPROGRAMS \
mostlyclean-am
distclean: distclean-am
-rm -rf ./$(DEPDIR)
-rm -f Makefile
distclean-am: clean-am distclean-compile distclean-generic \
distclean-tags
dvi: dvi-am
dvi-am:
html: html-am
html-am:
info: info-am
info-am:
install-data-am:
install-dvi: install-dvi-am
install-dvi-am:
install-exec-am:
install-html: install-html-am
install-html-am:
install-info: install-info-am
install-info-am:
install-man:
install-pdf: install-pdf-am
install-pdf-am:
install-ps: install-ps-am
install-ps-am:
installcheck-am:
maintainer-clean: maintainer-clean-am
-rm -rf ./$(DEPDIR)
-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic
mostlyclean: mostlyclean-am
mostlyclean-am: mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool
pdf: pdf-am
pdf-am:
ps: ps-am
ps-am:
uninstall-am:
.MAKE: install-am install-strip
.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
clean-libtool clean-noinstPROGRAMS ctags distclean \
distclean-compile distclean-generic distclean-libtool \
distclean-tags distdir dvi dvi-am html html-am info info-am \
install install-am install-data install-data-am install-dvi \
install-dvi-am install-exec install-exec-am install-html \
install-html-am install-info install-info-am install-man \
install-pdf install-pdf-am install-ps install-ps-am \
install-strip installcheck installcheck-am installdirs \
maintainer-clean maintainer-clean-generic mostlyclean \
mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
pdf pdf-am ps ps-am tags uninstall uninstall-am
test: encode listcap names posix simple sql syntax
@$(sampledir)/encode
@$(sampledir)/listcap
@$(sampledir)/names
@$(sampledir)/posix
@$(sampledir)/simple
@$(sampledir)/sql
@$(sampledir)/syntax
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:

221
src/Onigmo/sample/crnl.c Normal file
View File

@ -0,0 +1,221 @@
/*
* crnl.c 2007/05/30 K.Kosako
*
* !!! You should enable USE_CRNL_AS_LINE_TERMINATOR. !!!
*
* USE_CRNL_AS_LINE_TERMINATOR config test program.
*/
#include <stdio.h>
#include <string.h>
#include "oniguruma.h"
/* #define USE_UNICODE_ALL_LINE_TERMINATORS */
static int nfail = 0;
static void result(int no, int from, int to,
int expected_from, int expected_to)
{
fprintf(stderr, "%3d: ", no);
if (from == expected_from && to == expected_to) {
fprintf(stderr, "Success\n");
}
else {
fprintf(stderr, "Fail: expected: (%d-%d), result: (%d-%d)\n",
expected_from, expected_to, from, to);
nfail++;
}
}
static int
x0(int no, char* pattern_arg, char* str_arg,
int start_offset, int expected_from, int expected_to, int backward)
{
int r;
unsigned char *start, *range, *end;
regex_t* reg;
OnigErrorInfo einfo;
OnigRegion *region;
UChar *pattern, *str;
pattern = (UChar* )pattern_arg;
str = (UChar* )str_arg;
r = onig_new(&reg, pattern, pattern + strlen((char* )pattern),
ONIG_OPTION_NEWLINE_CRLF, ONIG_ENCODING_UTF8, ONIG_SYNTAX_DEFAULT, &einfo);
if (r != ONIG_NORMAL) {
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(s, r, &einfo);
fprintf(stderr, "ERROR: %s\n", s);
return -1;
}
region = onig_region_new();
end = str + strlen((char* )str);
if (backward) {
start = end + start_offset;
range = str;
}
else {
start = str + start_offset;
range = end;
}
r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
if (r >= 0 || r == ONIG_MISMATCH) {
result(no, region->beg[0], region->end[0], expected_from, expected_to);
}
else if (r == ONIG_MISMATCH) {
result(no, r, -1, expected_from, expected_to);
}
else { /* error */
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(s, r);
fprintf(stderr, "ERROR: %s\n", s);
return -1;
}
onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
onig_free(reg);
return 0;
}
static int
x(int no, char* pattern_arg, char* str_arg,
int expected_from, int expected_to)
{
return x0(no, pattern_arg, str_arg, 0, expected_from, expected_to, 0);
}
static int
f0(int no, char* pattern_arg, char* str_arg, int start_offset, int backward)
{
return x0(no, pattern_arg, str_arg, start_offset, -1, -1, backward);
}
static int
f(int no, char* pattern_arg, char* str_arg)
{
return x(no, pattern_arg, str_arg, -1, -1);
}
extern int main(int argc, char* argv[])
{
x( 1, "", "\r\n", 0, 0);
/* x( 2, ".", "\r\n", 0, 1); */
f( 2, ".", "\r\n");
f( 3, "..", "\r\n");
x( 4, "^", "\r\n", 0, 0);
x( 5, "\\n^", "\r\nf", 1, 2);
x( 6, "\\n^a", "\r\na", 1, 3);
x( 7, "$", "\r\n", 0, 0);
x( 8, "T$", "T\r\n", 0, 1);
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
x( 9, "T$", "T\raT\r\n", 0, 1);
#else
x( 9, "T$", "T\raT\r\n", 3, 4);
#endif
x(10, "\\z", "\r\n", 2, 2);
f(11, "a\\z", "a\r\n");
x(12, "\\Z", "\r\n", 0, 0);
x(13, "\\Z", "\r\na", 3, 3);
x(14, "\\Z", "\r\n\r\n\n", 4, 4);
x(15, "\\Z", "\r\n\r\nX", 5, 5);
x(16, "a\\Z", "a\r\n", 0, 1);
x(17, "aaaaaaaaaaaaaaa\\Z", "aaaaaaaaaaaaaaa\r\n", 0, 15);
x(18, "a|$", "b\r\n", 1, 1);
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
x(19, "$|b", "\rb", 0, 0);
#else
x(19, "$|b", "\rb", 1, 2);
#endif
x(20, "a$|ab$", "\r\nab\r\n", 2, 4);
x(21, "a|\\Z", "b\r\n", 1, 1);
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
x(22, "\\Z|b", "\rb", 0, 0);
#else
x(22, "\\Z|b", "\rb", 1, 2);
#endif
x(23, "a\\Z|ab\\Z", "\r\nab\r\n", 2, 4);
x(24, "(?=a$).", "a\r\n", 0, 1);
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
x(25, "(?=a$).", "a\r", 0, 1);
f(26, "(?!a$)..", "a\r");
#else
f(25, "(?=a$).", "a\r");
x(26, "(?!a$)..", "a\r", 0, 2);
#endif
/* x(27, "(?<=a$).\\n", "a\r\n", 1, 3); */
x(27, "(?<=a$)\\r\\n", "a\r\n", 1, 3);
/* f(28, "(?<!a$).\\n", "a\r\n"); */
f(28, "(?<!a$)\\r\\n", "a\r\n");
x(29, "(?=a\\Z).", "a\r\n", 0, 1);
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
x(30, "(?=a\\Z).", "a\r", 0, 1);
f(31, "(?!a\\Z)..", "a\r");
#else
f(30, "(?=a\\Z).", "a\r");
x(31, "(?!a\\Z)..", "a\r", 0, 2);
#endif
x(32, ".*$", "aa\r\n", 0, 2);
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
x(33, ".*$", "aa\r", 0, 2);
#else
x(33, ".*$", "aa\r", 0, 3);
#endif
x(34, "\\R{3}", "\r\r\n\n", 0, 4);
x(35, "$", "\n", 0, 0);
x(36, "T$", "T\n", 0, 1);
x(37, "(?m).", "\r\n", 0, 1);
x(38, "(?m)..", "\r\n", 0, 2);
x0(39, "^", "\n.", 1, 1, 1, 0);
x0(40, "^", "\r\n.", 1, 2, 2, 0);
x0(41, "^", "\r\n.", 2, 2, 2, 0);
x0(42, "$", "\n\n", 1, 1, 1, 0);
x0(43, "$", "\r\n\n", 1, 2, 2, 0);
x0(44, "$", "\r\n\n", 2, 2, 2, 0);
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
x0(45, "^$", "\n\r", 1, 1, 1, 0);
#else
f0(45, "^$", "\n\r", 1, 0);
#endif
x0(46, "^$", "\n\r\n", 1, 1, 1, 0);
x0(47, "^$", "\r\n\n", 1, 2, 2, 0);
x0(48, "\\Z", "\r\n\n", 1, 2, 2, 0);
f0(49, ".(?=\\Z)", "\r\n", 1, 0);
x0(50, "(?=\\Z)", "\r\n", 1, 2, 2, 0);
x0(51, "(?<=^).", "\r\n.", 0, 2, 3, 0);
x0(52, "(?<=^).", "\r\n.", 1, 2, 3, 0);
x0(53, "(?<=^).", "\r\n.", 2, 2, 3, 0);
x0(54, "^a", "\r\na", 0, 2, 3, 0);
x0(55, "^a", "\r\na", 1, 2, 3, 0);
x0(56, "(?m)$.{1,2}a", "\r\na", 0, 0, 3, 0);
f0(57, "(?m)$.{1,2}a", "\r\na", 1, 0);
x0(58, ".*b", "\r\naaab\r\n", 1, 2, 6, 0);
/* backward search */
/* x0(59, "$", "\n\n", 0, 1, 1, 1); */ /* BUG? */
x0(60, "$", "\n\n", -1, 1, 1, 1);
x0(61, "$", "\n\r\n", -1, 1, 1, 1);
x0(62, "$", "\n\r\n", -2, 1, 1, 1);
x0(63, "^$", "\n\r\n", -1, 1, 1, 1);
x0(64, "^$", "\n\r\n", 0, 1, 1, 1);
x0(65, "^$", "\r\n\n", 0, 2, 2, 1);
x0(66, "^a", "\r\na", 0, 2, 3, 1);
x0(67, "^a", "\r\na", -1, 2, 3, 1);
f0(68, "^a", "\r\na", -2, 1);
onig_end();
if (nfail > 0) {
fprintf(stderr, "\n");
fprintf(stderr, "!!! You have to enable USE_CRNL_AS_LINE_TERMINATOR\n");
fprintf(stderr, "!!! in regenc.h for this test program.\n");
fprintf(stderr, "\n");
}
return 0;
}

298
src/Onigmo/sample/encode.c Normal file
View File

@ -0,0 +1,298 @@
/*
* encode.c
*/
#include <stdio.h>
#include "oniguruma.h"
static int
search(regex_t* reg, unsigned char* str, unsigned char* end)
{
int r;
unsigned char *start, *range;
OnigRegion *region;
region = onig_region_new();
start = str;
range = end;
r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
if (r >= 0) {
int i;
fprintf(stderr, "match at %d (%s)\n", r,
ONIGENC_NAME(onig_get_encoding(reg)));
for (i = 0; i < region->num_regs; i++) {
fprintf(stderr, "%d: (%ld-%ld)\n", i, region->beg[i], region->end[i]);
}
}
else if (r == ONIG_MISMATCH) {
fprintf(stderr, "search fail (%s)\n",
ONIGENC_NAME(onig_get_encoding(reg)));
}
else { /* error */
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(s, r);
fprintf(stderr, "ERROR: %s\n", s);
fprintf(stderr, " (%s)\n", ONIGENC_NAME(onig_get_encoding(reg)));
return -1;
}
onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
return 0;
}
static int
exec(OnigEncoding enc, OnigOptionType options,
char* apattern, char* astr)
{
int r;
unsigned char *end;
regex_t* reg;
OnigErrorInfo einfo;
UChar* pattern = (UChar* )apattern;
UChar* str = (UChar* )astr;
r = onig_new(&reg, pattern,
pattern + onigenc_str_bytelen_null(enc, pattern),
options, enc, ONIG_SYNTAX_DEFAULT, &einfo);
if (r != ONIG_NORMAL) {
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(s, r, &einfo);
fprintf(stderr, "ERROR: %s\n", s);
return -1;
}
end = str + onigenc_str_bytelen_null(enc, str);
r = search(reg, str, end);
onig_free(reg);
onig_end();
return 0;
}
static OnigCaseFoldType CF = ONIGENC_CASE_FOLD_MIN;
#if 0
static void
set_case_fold(OnigCaseFoldType cf)
{
CF = cf;
}
#endif
static int
exec_deluxe(OnigEncoding pattern_enc, OnigEncoding str_enc,
OnigOptionType options, char* apattern, char* astr)
{
int r;
unsigned char *end;
regex_t* reg;
OnigCompileInfo ci;
OnigErrorInfo einfo;
UChar* pattern = (UChar* )apattern;
UChar* str = (UChar* )astr;
ci.num_of_elements = 5;
ci.pattern_enc = pattern_enc;
ci.target_enc = str_enc;
ci.syntax = ONIG_SYNTAX_DEFAULT;
ci.option = options;
ci.case_fold_flag = CF;
r = onig_new_deluxe(&reg, pattern,
pattern + onigenc_str_bytelen_null(pattern_enc, pattern),
&ci, &einfo);
if (r != ONIG_NORMAL) {
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(s, r, &einfo);
fprintf(stderr, "ERROR: %s\n", s);
return -1;
}
end = str + onigenc_str_bytelen_null(str_enc, str);
r = search(reg, str, end);
onig_free(reg);
onig_end();
return 0;
}
extern int main(int argc, char* argv[])
{
int r;
/* ISO 8859-1 test */
static unsigned char str[] = { 0xc7, 0xd6, 0xfe, 0xea, 0xe0, 0xe2, 0x00 };
static unsigned char pattern[] = { 0xe7, 0xf6, 0xde, '\\', 'w', '+', 0x00 };
r = exec(ONIG_ENCODING_CP1251, ONIG_OPTION_IGNORECASE,
"aBc", " AbC");
r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE,
" [a-c\337z] ", " SS ");
r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE,
" [\330-\341] ", " SS ");
r = exec(ONIG_ENCODING_ISO_8859_2, ONIG_OPTION_IGNORECASE,
"\337 ", " Ss ");
r = exec(ONIG_ENCODING_ISO_8859_2, ONIG_OPTION_IGNORECASE,
"SS ", " \337 ");
r = exec(ONIG_ENCODING_ISO_8859_2, ONIG_OPTION_IGNORECASE,
"\\A\\S\\z", "ss");
r = exec(ONIG_ENCODING_ISO_8859_2, ONIG_OPTION_IGNORECASE,
"[ac]+", "bbbaAaCCC");
r = exec(ONIG_ENCODING_ISO_8859_3, ONIG_OPTION_IGNORECASE,
"[ac]+", "bbbaAaCCC");
r = exec(ONIG_ENCODING_ISO_8859_4, ONIG_OPTION_IGNORECASE,
"[ac]+", "bbbaAaCCC");
r = exec(ONIG_ENCODING_ISO_8859_5, ONIG_OPTION_IGNORECASE,
"[ac]+", "bbbaAaCCC");
r = exec(ONIG_ENCODING_ISO_8859_6, ONIG_OPTION_IGNORECASE,
"[ac]+", "bbbaAaCCC");
r = exec(ONIG_ENCODING_ISO_8859_7, ONIG_OPTION_IGNORECASE,
"[ac]+", "bbbaAaCCC");
r = exec(ONIG_ENCODING_ISO_8859_8, ONIG_OPTION_IGNORECASE,
"[ac]+", "bbbaAaCCC");
r = exec(ONIG_ENCODING_ISO_8859_9, ONIG_OPTION_IGNORECASE,
"[ac]+", "bbbaAaCCC");
r = exec(ONIG_ENCODING_ISO_8859_10, ONIG_OPTION_IGNORECASE,
"[ac]+", "bbbaAaCCC");
r = exec(ONIG_ENCODING_ISO_8859_11, ONIG_OPTION_IGNORECASE,
"[ac]+", "bbbaAaCCC");
r = exec(ONIG_ENCODING_ISO_8859_13, ONIG_OPTION_IGNORECASE,
"[ac]+", "bbbaAaCCC");
r = exec(ONIG_ENCODING_ISO_8859_14, ONIG_OPTION_IGNORECASE,
"[ac]+", "bbbaAaCCC");
r = exec(ONIG_ENCODING_ISO_8859_15, ONIG_OPTION_IGNORECASE,
(char* )pattern, (char* )str);
r = exec(ONIG_ENCODING_ISO_8859_16, ONIG_OPTION_IGNORECASE,
(char* )pattern, (char* )str);
r = exec(ONIG_ENCODING_KOI8_R, ONIG_OPTION_NONE, "a+", "bbbaaaccc");
r = exec(ONIG_ENCODING_EUC_TW, ONIG_OPTION_NONE, "b*a+?c+", "bbbaaaccc");
r = exec(ONIG_ENCODING_EUC_KR, ONIG_OPTION_NONE, "a+", "bbbaaaccc");
r = exec(ONIG_ENCODING_EUC_CN, ONIG_OPTION_NONE, "c+", "bbbaaaccc");
r = exec(ONIG_ENCODING_BIG5, ONIG_OPTION_NONE, "a+", "bbbaaaccc");
r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE,
"\337", "SS");
r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE,
"SS", "\337");
r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE,
"SSb\337ssc", "a\337bSS\337cd");
r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE,
"[a\337]{0,2}", "aSS");
r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE,
"is", "iss");
r = exec_deluxe(ONIG_ENCODING_ASCII, ONIG_ENCODING_UTF16_BE,
ONIG_OPTION_NONE, "a+",
"\000b\000a\000a\000a\000c\000c\000\000");
r = exec_deluxe(ONIG_ENCODING_ASCII, ONIG_ENCODING_UTF16_LE,
ONIG_OPTION_NONE, "a+",
"b\000a\000a\000a\000a\000c\000\000\000");
r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_LE,
ONIG_OPTION_NONE,
"\000b\000a\000a\000a\000c\000c\000\000",
"x\000b\000a\000a\000a\000c\000c\000\000\000");
r = exec_deluxe(ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_UTF16_BE,
ONIG_OPTION_IGNORECASE,
"\337", "\000S\000S\000\000");
r = exec_deluxe(ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_UTF16_BE,
ONIG_OPTION_IGNORECASE,
"SS", "\000\337\000\000");
r = exec_deluxe(ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_UTF16_LE,
ONIG_OPTION_IGNORECASE,
"\337", "S\000S\000\000\000");
r = exec_deluxe(ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_UTF32_BE,
ONIG_OPTION_IGNORECASE,
"SS", "\000\000\000\337\000\000\000\000");
r = exec_deluxe(ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_UTF32_LE,
ONIG_OPTION_IGNORECASE,
"\337", "S\000\000\000S\000\000\000\000\000\000\000");
r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_NONE,
"\000[\000[\000:\000a\000l\000n\000u\000m\000:\000]\000]\000+\000\000",
"\000#\002\120\000a\000Z\012\077\012\076\012\075\000\000");
/* 0x0a3d == \012\075 : is not alnum */
/* 0x0a3e == \012\076 : is alnum */
r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_NONE,
"\000\\\000d\000+\000\000",
"\0003\0001\377\020\377\031\377\032\000\000");
r = exec(ONIG_ENCODING_GB18030, ONIG_OPTION_IGNORECASE,
"(Aa\\d)+", "BaA5Aa0234");
r = exec_deluxe(ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_UTF16_BE,
ONIG_OPTION_NONE,
"^\\P{Hiragana}\\p{^Hiragana}(\\p{Hiragana}+)$",
"\060\100\060\240\060\101\060\102\060\226\060\237\000\000");
r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
ONIG_OPTION_IGNORECASE,
"\000[\000\337\000]\000\000", "\000S\000S\000\000");
r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
ONIG_OPTION_IGNORECASE,
"\000[\000\337\000]\000\000", "\000s\000S\000\000");
r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
ONIG_OPTION_IGNORECASE,
"\000^\000[\000\001\000-\377\375\000]\000$\000\000",
"\000s\000S\000\000");
r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
ONIG_OPTION_IGNORECASE,
"\000S\000S\000\000",
"\000S\000T\000\337\000\000");
r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
ONIG_OPTION_IGNORECASE,
"\000S\000T\000S\000S\000\000",
"\000S\000t\000s\000S\000\000");
{
UChar pat[] = { 0x1f, 0xfc, 0x00, 0x00 };
UChar str1[] = { 0x21, 0x26, 0x1f, 0xbe, 0x00, 0x00 };
UChar str2[] = { 0x1f, 0xf3, 0x00, 0x00 };
r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
ONIG_OPTION_IGNORECASE,
(char* )pat, (char* )str1);
r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
ONIG_OPTION_IGNORECASE,
(char* )pat, (char* )str2);
}
#if 0
/* You should define USE_UNICODE_CASE_FOLD_TURKISH_AZERI in regenc.h. */
set_case_fold(ONIGENC_CASE_FOLD_TURKISH_AZERI);
r = exec_deluxe(ONIG_ENCODING_UTF8, ONIG_ENCODING_UTF8,
ONIG_OPTION_IGNORECASE,
"Ii", "\304\261\304\260");
r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
ONIG_OPTION_IGNORECASE,
"\000I\000i\000\000", "\001\061\001\060\000\000");
r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
ONIG_OPTION_IGNORECASE,
"\001\061\001\060\000\000", "\000I\000i\000\000");
set_case_fold(ONIGENC_CASE_FOLD_MIN);
#endif
return 0;
}

108
src/Onigmo/sample/listcap.c Normal file
View File

@ -0,0 +1,108 @@
/*
* listcap.c
*
* capture history (?@...) sample.
*/
#include <stdio.h>
#include <string.h>
#include "oniguruma.h"
static int
node_callback(int group, OnigPosition beg, OnigPosition end, int level,
int at, void* arg)
{
int i;
if (at != ONIG_TRAVERSE_CALLBACK_AT_FIRST)
return -1; /* error */
/* indent */
for (i = 0; i < level * 2; i++)
fputc(' ', stderr);
fprintf(stderr, "%d: (%ld-%ld)\n", group, beg, end);
return 0;
}
extern int ex(unsigned char* str, unsigned char* pattern,
OnigSyntaxType* syntax)
{
int r;
unsigned char *start, *range, *end;
regex_t* reg;
OnigErrorInfo einfo;
OnigRegion *region;
r = onig_new(&reg, pattern, pattern + strlen((char* )pattern),
ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, syntax, &einfo);
if (r != ONIG_NORMAL) {
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(s, r, &einfo);
fprintf(stderr, "ERROR: %s\n", s);
return -1;
}
fprintf(stderr, "number of captures: %d\n", onig_number_of_captures(reg));
fprintf(stderr, "number of capture histories: %d\n",
onig_number_of_capture_histories(reg));
region = onig_region_new();
end = str + strlen((char* )str);
start = str;
range = end;
r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
if (r >= 0) {
int i;
fprintf(stderr, "match at %d\n", r);
for (i = 0; i < region->num_regs; i++) {
fprintf(stderr, "%d: (%ld-%ld)\n", i, region->beg[i], region->end[i]);
}
fprintf(stderr, "\n");
r = onig_capture_tree_traverse(region, ONIG_TRAVERSE_CALLBACK_AT_FIRST,
node_callback, (void* )0);
}
else if (r == ONIG_MISMATCH) {
fprintf(stderr, "search fail\n");
}
else { /* error */
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(s, r);
return -1;
}
onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
onig_free(reg);
return 0;
}
extern int main(int argc, char* argv[])
{
int r;
OnigSyntaxType syn;
static UChar* str1 = (UChar* )"((())())";
static UChar* pattern1
= (UChar* )"\\g<p>(?@<p>\\(\\g<s>\\)){0}(?@<s>(?:\\g<p>)*|){0}";
static UChar* str2 = (UChar* )"x00x00x00";
static UChar* pattern2 = (UChar* )"(?@x(?@\\d+))+";
static UChar* str3 = (UChar* )"0123";
static UChar* pattern3 = (UChar* )"(?@.)(?@.)(?@.)(?@.)";
/* enable capture hostory */
onig_copy_syntax(&syn, ONIG_SYNTAX_DEFAULT);
onig_set_syntax_op2(&syn,
onig_get_syntax_op2(&syn) | ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY);
r = ex(str1, pattern1, &syn);
r = ex(str2, pattern2, &syn);
r = ex(str3, pattern3, &syn);
onig_end();
return 0;
}

72
src/Onigmo/sample/names.c Normal file
View File

@ -0,0 +1,72 @@
/*
* names.c -- example of group name callback.
*/
#include <stdio.h>
#include <string.h>
#include "oniguruma.h"
static int
name_callback(const UChar* name, const UChar* name_end,
int ngroup_num, int* group_nums,
regex_t* reg, void* arg)
{
int i, gn, ref;
char* s;
OnigRegion *region = (OnigRegion* )arg;
for (i = 0; i < ngroup_num; i++) {
gn = group_nums[i];
ref = onig_name_to_backref_number(reg, name, name_end, region);
s = (ref == gn ? "*" : "");
fprintf(stderr, "%s (%d): ", name, gn);
fprintf(stderr, "(%ld-%ld) %s\n", region->beg[gn], region->end[gn], s);
}
return 0; /* 0: continue */
}
extern int main(int argc, char* argv[])
{
int r;
unsigned char *start, *range, *end;
regex_t* reg;
OnigErrorInfo einfo;
OnigRegion *region;
static UChar* pattern = (UChar* )"(?<foo>a*)(?<bar>b*)(?<foo>c*)";
static UChar* str = (UChar* )"aaabbbbcc";
r = onig_new(&reg, pattern, pattern + strlen((char* )pattern),
ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_DEFAULT, &einfo);
if (r != ONIG_NORMAL) {
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(s, r, &einfo);
fprintf(stderr, "ERROR: %s\n", s);
return -1;
}
fprintf(stderr, "number of names: %d\n", onig_number_of_names(reg));
region = onig_region_new();
end = str + strlen((char* )str);
start = str;
range = end;
r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
if (r >= 0) {
fprintf(stderr, "match at %d\n\n", r);
r = onig_foreach_name(reg, name_callback, (void* )region);
}
else if (r == ONIG_MISMATCH) {
fprintf(stderr, "search fail\n");
}
else { /* error */
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(s, r);
return -1;
}
onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
onig_free(reg);
onig_end();
return 0;
}

93
src/Onigmo/sample/posix.c Normal file
View File

@ -0,0 +1,93 @@
/*
* posix.c
*/
#include <stdio.h>
#include "onigposix.h"
typedef unsigned char UChar;
static int x(regex_t* reg, unsigned char* pattern, unsigned char* str)
{
int r, i;
char buf[200];
regmatch_t pmatch[20];
r = regexec(reg, (char* )str, reg->re_nsub + 1, pmatch, 0);
if (r != 0 && r != REG_NOMATCH) {
regerror(r, reg, buf, sizeof(buf));
fprintf(stderr, "ERROR: %s\n", buf);
return -1;
}
if (r == REG_NOMATCH) {
fprintf(stderr, "FAIL: /%s/ '%s'\n", pattern, str);
}
else {
fprintf(stderr, "OK: /%s/ '%s'\n", pattern, str);
for (i = 0; i <= (int )reg->re_nsub; i++) {
fprintf(stderr, "%d: %d-%d\n", i, pmatch[i].rm_so, pmatch[i].rm_eo);
}
}
return 0;
}
extern int main(int argc, char* argv[])
{
int r;
char buf[200];
regex_t reg;
UChar* pattern;
/* default syntax (ONIG_SYNTAX_RUBY) */
pattern = (UChar* )"^a+b{2,7}[c-f]?$|uuu";
r = regcomp(&reg, (char* )pattern, REG_EXTENDED);
if (r) {
regerror(r, &reg, buf, sizeof(buf));
fprintf(stderr, "ERROR: %s\n", buf);
return -1;
}
x(&reg, pattern, (UChar* )"aaabbbbd");
/* POSIX Basic RE (REG_EXTENDED is not specified.) */
pattern = (UChar* )"^a+b{2,7}[c-f]?|uuu";
r = regcomp(&reg, (char* )pattern, 0);
if (r) {
regerror(r, &reg, buf, sizeof(buf));
fprintf(stderr, "ERROR: %s\n", buf);
return -1;
}
x(&reg, pattern, (UChar* )"a+b{2,7}d?|uuu");
/* POSIX Basic RE (REG_EXTENDED is not specified.) */
pattern = (UChar* )"^a*b\\{2,7\\}\\([c-f]\\)$";
r = regcomp(&reg, (char* )pattern, 0);
if (r) {
regerror(r, &reg, buf, sizeof(buf));
fprintf(stderr, "ERROR: %s\n", buf);
return -1;
}
x(&reg, pattern, (UChar* )"aaaabbbbbbd");
/* POSIX Extended RE */
onig_set_default_syntax(ONIG_SYNTAX_POSIX_EXTENDED);
pattern = (UChar* )"^a+b{2,7}[c-f]?)$|uuu";
r = regcomp(&reg, (char* )pattern, REG_EXTENDED);
if (r) {
regerror(r, &reg, buf, sizeof(buf));
fprintf(stderr, "ERROR: %s\n", buf);
return -1;
}
x(&reg, pattern, (UChar* )"aaabbbbd)");
pattern = (UChar* )"^b.";
r = regcomp(&reg, (char* )pattern, REG_EXTENDED | REG_NEWLINE);
if (r) {
regerror(r, &reg, buf, sizeof(buf));
fprintf(stderr, "ERROR: %s\n", buf);
return -1;
}
x(&reg, pattern, (UChar* )"a\nb\n");
regfree(&reg);
return 0;
}

View File

@ -0,0 +1,56 @@
/*
* simple.c
*/
#include <stdio.h>
#include <string.h>
#include "oniguruma.h"
extern int main(int argc, char* argv[])
{
int r;
unsigned char *start, *range, *end;
regex_t* reg;
OnigErrorInfo einfo;
OnigRegion *region;
static UChar* pattern = (UChar* )"a(.*)b|[e-f]+";
static UChar* str = (UChar* )"zzzzaffffffffb";
r = onig_new(&reg, pattern, pattern + strlen((char* )pattern),
ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_DEFAULT, &einfo);
if (r != ONIG_NORMAL) {
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(s, r, &einfo);
fprintf(stderr, "ERROR: %s\n", s);
return -1;
}
region = onig_region_new();
end = str + strlen((char* )str);
start = str;
range = end;
r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
if (r >= 0) {
int i;
fprintf(stderr, "match at %d\n", r);
for (i = 0; i < region->num_regs; i++) {
fprintf(stderr, "%d: (%ld-%ld)\n", i, region->beg[i], region->end[i]);
}
}
else if (r == ONIG_MISMATCH) {
fprintf(stderr, "search fail\n");
}
else { /* error */
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(s, r);
fprintf(stderr, "ERROR: %s\n", s);
return -1;
}
onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
onig_free(reg);
onig_end();
return 0;
}

73
src/Onigmo/sample/sql.c Normal file
View File

@ -0,0 +1,73 @@
/*
* sql.c
*/
#include <stdio.h>
#include <string.h>
#include "oniguruma.h"
extern int main(int argc, char* argv[])
{
static OnigSyntaxType SQLSyntax;
int r;
unsigned char *start, *range, *end;
regex_t* reg;
OnigErrorInfo einfo;
OnigRegion *region;
static UChar* pattern = (UChar* )"\\_%\\\\__zz";
static UChar* str = (UChar* )"a_abcabcabc\\ppzz";
onig_set_syntax_op (&SQLSyntax, ONIG_SYN_OP_VARIABLE_META_CHARACTERS);
onig_set_syntax_op2 (&SQLSyntax, 0);
onig_set_syntax_behavior(&SQLSyntax, 0);
onig_set_syntax_options (&SQLSyntax, ONIG_OPTION_MULTILINE);
onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ESCAPE, (OnigCodePoint )'\\');
onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ANYCHAR, (OnigCodePoint )'_');
onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ANYTIME,
ONIG_INEFFECTIVE_META_CHAR);
onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ZERO_OR_ONE_TIME,
ONIG_INEFFECTIVE_META_CHAR);
onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ONE_OR_MORE_TIME,
ONIG_INEFFECTIVE_META_CHAR);
onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ANYCHAR_ANYTIME,
(OnigCodePoint )'%');
r = onig_new(&reg, pattern, pattern + strlen((char* )pattern),
ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, &SQLSyntax, &einfo);
if (r != ONIG_NORMAL) {
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(s, r, &einfo);
fprintf(stderr, "ERROR: %s\n", s);
return -1;
}
region = onig_region_new();
end = str + strlen((char* )str);
start = str;
range = end;
r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
if (r >= 0) {
int i;
fprintf(stderr, "match at %d\n", r);
for (i = 0; i < region->num_regs; i++) {
fprintf(stderr, "%d: (%ld-%ld)\n", i, region->beg[i], region->end[i]);
}
}
else if (r == ONIG_MISMATCH) {
fprintf(stderr, "search fail\n");
}
else { /* error */
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(s, r);
fprintf(stderr, "ERROR: %s\n", s);
return -1;
}
onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
onig_free(reg);
onig_end();
return 0;
}

Some files were not shown because too many files have changed in this diff Show More