mirror of
https://github.com/kingToolbox/WindTerm.git
synced 2025-01-13 05:30:06 +08:00
Add iterator to search gap buffer and wildcard matching, whole word matching.
This commit is contained in:
parent
432b7676da
commit
855a3e54d3
@ -32,20 +32,25 @@
|
||||
|
||||
OnigEncodingType OnigEncodingASCII = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
onigenc_single_byte_mbc_enc_len_se,
|
||||
"US-ASCII", /* name */
|
||||
1, /* max byte length */
|
||||
1, /* min byte length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_is_mbc_newline_0x0a_se,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_mbc_to_code_se,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
onigenc_ascii_mbc_case_fold,
|
||||
onigenc_ascii_mbc_case_fold_se,
|
||||
onigenc_ascii_apply_all_case_fold,
|
||||
onigenc_ascii_get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
onigenc_ascii_is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_single_byte_left_adjust_char_head_se,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
||||
|
@ -54,12 +54,24 @@ big5_mbc_enc_len(const UChar* p)
|
||||
return EncLen_BIG5[*p];
|
||||
}
|
||||
|
||||
static int
|
||||
big5_mbc_enc_len_se(OnigIterator* it, OnigPosition p)
|
||||
{
|
||||
return EncLen_BIG5[ONIG_CHARAT(p)];
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
big5_mbc_to_code(const UChar* p, const UChar* end)
|
||||
{
|
||||
return onigenc_mbn_mbc_to_code(ONIG_ENCODING_BIG5, p, end);
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
big5_mbc_to_code_se(OnigIterator* it, OnigPosition p, OnigPosition end)
|
||||
{
|
||||
return onigenc_mbn_mbc_to_code_se(it, ONIG_ENCODING_BIG5, p, end);
|
||||
}
|
||||
|
||||
static int
|
||||
big5_code_to_mbc(OnigCodePoint code, UChar *buf)
|
||||
{
|
||||
@ -74,6 +86,14 @@ big5_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end,
|
||||
pp, end, lower);
|
||||
}
|
||||
|
||||
static int
|
||||
big5_mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag, OnigPosition* pp, OnigPosition end,
|
||||
UChar* lower)
|
||||
{
|
||||
return onigenc_mbn_mbc_case_fold_se(it, ONIG_ENCODING_BIG5, flag,
|
||||
pp, end, lower);
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
big5_is_mbc_ambiguous(OnigCaseFoldType flag,
|
||||
@ -134,6 +154,29 @@ big5_left_adjust_char_head(const UChar* start, const UChar* s)
|
||||
return (UChar* )(p + ((s - p) & ~1));
|
||||
}
|
||||
|
||||
static OnigPosition
|
||||
big5_left_adjust_char_head_se(OnigIterator* it, OnigPosition start, OnigPosition s)
|
||||
{
|
||||
OnigPosition p;
|
||||
int len;
|
||||
|
||||
if (s <= start) return s;
|
||||
p = s;
|
||||
|
||||
if (BIG5_ISMB_TRAIL(ONIG_CHARAT(p))) {
|
||||
while (p > start) {
|
||||
if (! BIG5_ISMB_FIRST(ONIG_CHARAT(--p))) {
|
||||
p++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
len = enclen_se(it, ONIG_ENCODING_BIG5, p);
|
||||
if (p + len > s) return p;
|
||||
p += len;
|
||||
return (p + ((s - p) & ~1));
|
||||
}
|
||||
|
||||
static int
|
||||
big5_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
|
||||
{
|
||||
@ -144,20 +187,25 @@ big5_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
|
||||
|
||||
OnigEncodingType OnigEncodingBIG5 = {
|
||||
big5_mbc_enc_len,
|
||||
big5_mbc_enc_len_se,
|
||||
"Big5", /* name */
|
||||
2, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_is_mbc_newline_0x0a_se,
|
||||
big5_mbc_to_code,
|
||||
big5_mbc_to_code_se,
|
||||
onigenc_mb2_code_to_mbclen,
|
||||
big5_code_to_mbc,
|
||||
big5_mbc_case_fold,
|
||||
big5_mbc_case_fold_se,
|
||||
onigenc_ascii_apply_all_case_fold,
|
||||
onigenc_ascii_get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
big5_is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
big5_left_adjust_char_head,
|
||||
big5_left_adjust_char_head_se,
|
||||
big5_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
||||
|
@ -115,6 +115,17 @@ cp1251_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
cp1251_mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag ARG_UNUSED,
|
||||
OnigPosition* pp, OnigPosition end ARG_UNUSED, UChar* lower)
|
||||
{
|
||||
const UChar c = ONIG_CHARAT(*pp);
|
||||
|
||||
*lower = ENC_CP1251_TO_LOWER_CASE(c);
|
||||
(*pp)++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
cp1251_is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
@ -182,20 +193,25 @@ cp1251_get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
|
||||
OnigEncodingType OnigEncodingCP1251 = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
onigenc_single_byte_mbc_enc_len_se,
|
||||
"CP1251", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_is_mbc_newline_0x0a_se,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_mbc_to_code_se,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
cp1251_mbc_case_fold,
|
||||
cp1251_mbc_case_fold_se,
|
||||
cp1251_apply_all_case_fold,
|
||||
cp1251_get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
cp1251_is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_single_byte_left_adjust_char_head_se,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
||||
|
@ -148,6 +148,12 @@ mbc_enc_len(const UChar* p)
|
||||
return EncLen_EUCJP[*p];
|
||||
}
|
||||
|
||||
static int
|
||||
mbc_enc_len_se(OnigIterator* it, OnigPosition p)
|
||||
{
|
||||
return EncLen_EUCJP[ONIG_CHARAT(p)];
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
mbc_to_code(const UChar* p, const UChar* end)
|
||||
{
|
||||
@ -166,6 +172,24 @@ mbc_to_code(const UChar* p, const UChar* end)
|
||||
return n;
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
mbc_to_code_se(OnigIterator* it, OnigPosition p, OnigPosition end)
|
||||
{
|
||||
int c, i, len;
|
||||
OnigCodePoint n;
|
||||
|
||||
len = mbc_enc_len_se(it, p);
|
||||
n = (OnigCodePoint )ONIG_CHARAT(p++);
|
||||
if (len == 1) return n;
|
||||
|
||||
for (i = 1; i < len; i++) {
|
||||
if (p >= end) break;
|
||||
c = ONIG_CHARAT(p++);
|
||||
n <<= 8; n += c;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
static int
|
||||
code_to_mbclen(OnigCodePoint code)
|
||||
{
|
||||
@ -310,6 +334,28 @@ mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag ARG_UNUSED,
|
||||
OnigPosition* pp, OnigPosition end ARG_UNUSED, UChar* lower)
|
||||
{
|
||||
const UChar c = ONIG_CHARAT(*pp);
|
||||
|
||||
if (ONIGENC_IS_MBC_ASCII_SE(c)) {
|
||||
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(c);
|
||||
(*pp)++;
|
||||
return 1;
|
||||
}
|
||||
else {
|
||||
OnigCodePoint code;
|
||||
int len;
|
||||
|
||||
code = get_lower_case(mbc_to_code_se(it, *pp, end));
|
||||
len = code_to_mbc(code, lower);
|
||||
(*pp) += len;
|
||||
return len; /* return byte length of converted char to lower */
|
||||
}
|
||||
}
|
||||
|
||||
static UChar*
|
||||
left_adjust_char_head(const UChar* start, const UChar* s)
|
||||
{
|
||||
@ -329,6 +375,25 @@ left_adjust_char_head(const UChar* start, const UChar* s)
|
||||
return (UChar* )(p + ((s - p) & ~1));
|
||||
}
|
||||
|
||||
static OnigPosition
|
||||
left_adjust_char_head_se(OnigIterator* it, OnigPosition start, OnigPosition s)
|
||||
{
|
||||
/* In this encoding
|
||||
mb-trail bytes doesn't mix with single bytes.
|
||||
*/
|
||||
OnigPosition p;
|
||||
int len;
|
||||
|
||||
if (s <= start) return s;
|
||||
p = s;
|
||||
|
||||
while (!eucjp_islead(ONIG_CHARAT(p)) && p > start) p--;
|
||||
len = mbc_enc_len_se(it, p);
|
||||
if (p + len > s) return p;
|
||||
p += len;
|
||||
return (p + ((s - p) & ~1));
|
||||
}
|
||||
|
||||
static int
|
||||
is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
|
||||
{
|
||||
@ -512,20 +577,25 @@ get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out,
|
||||
|
||||
OnigEncodingType OnigEncodingEUC_JP = {
|
||||
mbc_enc_len,
|
||||
mbc_enc_len_se,
|
||||
"EUC-JP", /* name */
|
||||
3, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_is_mbc_newline_0x0a_se,
|
||||
mbc_to_code,
|
||||
mbc_to_code_se,
|
||||
code_to_mbclen,
|
||||
code_to_mbc,
|
||||
mbc_case_fold,
|
||||
mbc_case_fold_se,
|
||||
apply_all_case_fold,
|
||||
get_case_fold_codes_by_str,
|
||||
property_name_to_ctype,
|
||||
is_code_ctype,
|
||||
get_ctype_code_range,
|
||||
left_adjust_char_head,
|
||||
left_adjust_char_head_se,
|
||||
is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
||||
|
@ -54,12 +54,24 @@ euckr_mbc_enc_len(const UChar* p)
|
||||
return EncLen_EUCKR[*p];
|
||||
}
|
||||
|
||||
static int
|
||||
euckr_mbc_enc_len_se(OnigIterator* it, OnigPosition p)
|
||||
{
|
||||
return EncLen_EUCKR[ONIG_CHARAT(p)];
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
euckr_mbc_to_code(const UChar* p, const UChar* end)
|
||||
{
|
||||
return onigenc_mbn_mbc_to_code(ONIG_ENCODING_EUC_KR, p, end);
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
euckr_mbc_to_code_se(OnigIterator* it, OnigPosition p, OnigPosition end)
|
||||
{
|
||||
return onigenc_mbn_mbc_to_code_se(it, ONIG_ENCODING_EUC_KR, p, end);
|
||||
}
|
||||
|
||||
static int
|
||||
euckr_code_to_mbc(OnigCodePoint code, UChar *buf)
|
||||
{
|
||||
@ -74,6 +86,14 @@ euckr_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end,
|
||||
pp, end, lower);
|
||||
}
|
||||
|
||||
static int
|
||||
euckr_mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag, OnigPosition* pp, OnigPosition end,
|
||||
UChar* lower)
|
||||
{
|
||||
return onigenc_mbn_mbc_case_fold_se(it, ONIG_ENCODING_EUC_KR, flag,
|
||||
pp, end, lower);
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
euckr_is_mbc_ambiguous(OnigCaseFoldType flag,
|
||||
@ -110,6 +130,25 @@ euckr_left_adjust_char_head(const UChar* start, const UChar* s)
|
||||
return (UChar* )(p + ((s - p) & ~1));
|
||||
}
|
||||
|
||||
static OnigPosition
|
||||
euckr_left_adjust_char_head_se(OnigIterator* it, OnigPosition start, OnigPosition s)
|
||||
{
|
||||
/* Assumed in this encoding,
|
||||
mb-trail bytes don't mix with single bytes.
|
||||
*/
|
||||
OnigPosition p;
|
||||
int len;
|
||||
|
||||
if (s <= start) return s;
|
||||
p = s;
|
||||
|
||||
while (!euckr_islead(ONIG_CHARAT(p)) && p > start) p--;
|
||||
len = enclen_se(it, ONIG_ENCODING_EUC_KR, p);
|
||||
if (p + len > s) return p;
|
||||
p += len;
|
||||
return (p + ((s - p) & ~1));
|
||||
}
|
||||
|
||||
static int
|
||||
euckr_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
|
||||
{
|
||||
@ -120,20 +159,25 @@ euckr_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
|
||||
|
||||
OnigEncodingType OnigEncodingEUC_KR = {
|
||||
euckr_mbc_enc_len,
|
||||
euckr_mbc_enc_len_se,
|
||||
"EUC-KR", /* name */
|
||||
2, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_is_mbc_newline_0x0a_se,
|
||||
euckr_mbc_to_code,
|
||||
euckr_mbc_to_code_se,
|
||||
onigenc_mb2_code_to_mbclen,
|
||||
euckr_code_to_mbc,
|
||||
euckr_mbc_case_fold,
|
||||
euckr_mbc_case_fold_se,
|
||||
onigenc_ascii_apply_all_case_fold,
|
||||
onigenc_ascii_get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
euckr_is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
euckr_left_adjust_char_head,
|
||||
euckr_left_adjust_char_head_se,
|
||||
euckr_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
||||
@ -141,20 +185,25 @@ OnigEncodingType OnigEncodingEUC_KR = {
|
||||
/* Same with OnigEncodingEUC_KR except the name */
|
||||
OnigEncodingType OnigEncodingEUC_CN = {
|
||||
euckr_mbc_enc_len,
|
||||
euckr_mbc_enc_len_se,
|
||||
"EUC-CN", /* name */
|
||||
2, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_is_mbc_newline_0x0a_se,
|
||||
euckr_mbc_to_code,
|
||||
euckr_mbc_to_code_se,
|
||||
onigenc_mb2_code_to_mbclen,
|
||||
euckr_code_to_mbc,
|
||||
euckr_mbc_case_fold,
|
||||
euckr_mbc_case_fold_se,
|
||||
onigenc_ascii_apply_all_case_fold,
|
||||
onigenc_ascii_get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
euckr_is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
euckr_left_adjust_char_head,
|
||||
euckr_left_adjust_char_head_se,
|
||||
euckr_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
||||
|
@ -54,12 +54,24 @@ euctw_mbc_enc_len(const UChar* p)
|
||||
return EncLen_EUCTW[*p];
|
||||
}
|
||||
|
||||
static int
|
||||
euctw_mbc_enc_len_se(OnigIterator* it, OnigPosition p)
|
||||
{
|
||||
return EncLen_EUCTW[ONIG_CHARAT(p)];
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
euctw_mbc_to_code(const UChar* p, const UChar* end)
|
||||
{
|
||||
return onigenc_mbn_mbc_to_code(ONIG_ENCODING_EUC_TW, p, end);
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
euctw_mbc_to_code_se(OnigIterator* it, OnigPosition p, OnigPosition end)
|
||||
{
|
||||
return onigenc_mbn_mbc_to_code_se(it, ONIG_ENCODING_EUC_TW, p, end);
|
||||
}
|
||||
|
||||
static int
|
||||
euctw_code_to_mbc(OnigCodePoint code, UChar *buf)
|
||||
{
|
||||
@ -74,6 +86,14 @@ euctw_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end,
|
||||
pp, end, lower);
|
||||
}
|
||||
|
||||
static int
|
||||
euctw_mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag, OnigPosition* pp, OnigPosition end,
|
||||
UChar* lower)
|
||||
{
|
||||
return onigenc_mbn_mbc_case_fold_se(it, ONIG_ENCODING_EUC_TW, flag,
|
||||
pp, end, lower);
|
||||
}
|
||||
|
||||
static int
|
||||
euctw_is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
@ -101,6 +121,25 @@ euctw_left_adjust_char_head(const UChar* start, const UChar* s)
|
||||
return (UChar* )(p + ((s - p) & ~1));
|
||||
}
|
||||
|
||||
static OnigPosition
|
||||
euctw_left_adjust_char_head_se(OnigIterator* it, OnigPosition start, OnigPosition s)
|
||||
{
|
||||
/* Assumed in this encoding,
|
||||
mb-trail bytes don't mix with single bytes.
|
||||
*/
|
||||
OnigPosition p;
|
||||
int len;
|
||||
|
||||
if (s <= start) return s;
|
||||
p = s;
|
||||
|
||||
while (!euctw_islead(ONIG_CHARAT(p)) && p > start) p--;
|
||||
len = enclen_se(it, ONIG_ENCODING_EUC_TW, p);
|
||||
if (p + len > s) return p;
|
||||
p += len;
|
||||
return (p + ((s - p) & ~1));
|
||||
}
|
||||
|
||||
static int
|
||||
euctw_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
|
||||
{
|
||||
@ -111,20 +150,25 @@ euctw_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
|
||||
|
||||
OnigEncodingType OnigEncodingEUC_TW = {
|
||||
euctw_mbc_enc_len,
|
||||
euctw_mbc_enc_len_se,
|
||||
"EUC-TW", /* name */
|
||||
4, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_is_mbc_newline_0x0a_se,
|
||||
euctw_mbc_to_code,
|
||||
euctw_mbc_to_code_se,
|
||||
onigenc_mb4_code_to_mbclen,
|
||||
euctw_code_to_mbc,
|
||||
euctw_mbc_case_fold,
|
||||
euctw_mbc_case_fold_se,
|
||||
onigenc_ascii_apply_all_case_fold,
|
||||
onigenc_ascii_get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
euctw_is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
euctw_left_adjust_char_head,
|
||||
euctw_left_adjust_char_head_se,
|
||||
euctw_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
||||
|
@ -75,12 +75,34 @@ gb18030_mbc_enc_len(const UChar* p)
|
||||
return 2;
|
||||
}
|
||||
|
||||
static int
|
||||
gb18030_mbc_enc_len_se(OnigIterator* it, OnigPosition p)
|
||||
{
|
||||
UChar c0, c1;
|
||||
|
||||
c0 = ONIG_CHARAT(p);
|
||||
if (GB18030_MAP[c0] != CM)
|
||||
return 1;
|
||||
c1 = ONIG_CHARAT(p+1);
|
||||
if (GB18030_MAP[c1] == C4)
|
||||
return 4;
|
||||
if (GB18030_MAP[c1] == C1)
|
||||
return 1; /* illegal sequence */
|
||||
return 2;
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
gb18030_mbc_to_code(const UChar* p, const UChar* end)
|
||||
{
|
||||
return onigenc_mbn_mbc_to_code(ONIG_ENCODING_GB18030, p, end);
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
gb18030_mbc_to_code_se(OnigIterator* it, OnigPosition p, OnigPosition end)
|
||||
{
|
||||
return onigenc_mbn_mbc_to_code_se(it, ONIG_ENCODING_GB18030, p, end);
|
||||
}
|
||||
|
||||
static int
|
||||
gb18030_code_to_mbc(OnigCodePoint code, UChar *buf)
|
||||
{
|
||||
@ -95,6 +117,14 @@ gb18030_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end,
|
||||
pp, end, lower);
|
||||
}
|
||||
|
||||
static int
|
||||
gb18030_mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag, OnigPosition* pp, OnigPosition end,
|
||||
UChar* lower)
|
||||
{
|
||||
return onigenc_mbn_mbc_case_fold_se(it, ONIG_ENCODING_GB18030, flag,
|
||||
pp, end, lower);
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
gb18030_is_mbc_ambiguous(OnigCaseFoldType flag,
|
||||
@ -469,6 +499,333 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s)
|
||||
return (UChar* )s; /* never come here. (escape warning) */
|
||||
}
|
||||
|
||||
static OnigPosition
|
||||
gb18030_left_adjust_char_head_se(OnigIterator* it, OnigPosition start, OnigPosition s)
|
||||
{
|
||||
OnigPosition p;
|
||||
UChar c;
|
||||
enum state state = S_START;
|
||||
|
||||
DEBUG_GB18030(("----------------\n"));
|
||||
for (p = s; p >= start; p--) {
|
||||
c = ONIG_CHARAT(p);
|
||||
DEBUG_GB18030(("state %d --(%02x)-->\n", state, c));
|
||||
switch (state) {
|
||||
case S_START:
|
||||
switch (GB18030_MAP[c]) {
|
||||
case C1:
|
||||
return s;
|
||||
case C2:
|
||||
state = S_one_C2; /* C2 */
|
||||
break;
|
||||
case C4:
|
||||
state = S_one_C4; /* C4 */
|
||||
break;
|
||||
case CM:
|
||||
state = S_one_CM; /* CM */
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case S_one_C2: /* C2 */
|
||||
switch (GB18030_MAP[c]) {
|
||||
case C1:
|
||||
case C2:
|
||||
case C4:
|
||||
return s;
|
||||
case CM:
|
||||
state = S_odd_CM_one_CX; /* CM C2 */
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case S_one_C4: /* C4 */
|
||||
switch (GB18030_MAP[c]) {
|
||||
case C1:
|
||||
case C2:
|
||||
case C4:
|
||||
return s;
|
||||
case CM:
|
||||
state = S_one_CMC4;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case S_one_CM: /* CM */
|
||||
switch (GB18030_MAP[c]) {
|
||||
case C1:
|
||||
case C2:
|
||||
return s;
|
||||
case C4:
|
||||
state = S_odd_C4CM;
|
||||
break;
|
||||
case CM:
|
||||
state = S_odd_CM_one_CX; /* CM CM */
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case S_odd_CM_one_CX: /* CM C2 */ /* CM CM */ /* CM CM CM C4 */
|
||||
switch (GB18030_MAP[c]) {
|
||||
case C1:
|
||||
case C2:
|
||||
case C4:
|
||||
return (s - 1);
|
||||
case CM:
|
||||
state = S_even_CM_one_CX;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case S_even_CM_one_CX: /* CM CM C2 */ /* CM CM CM */ /* CM CM C4 */
|
||||
switch (GB18030_MAP[c]) {
|
||||
case C1:
|
||||
case C2:
|
||||
case C4:
|
||||
return s;
|
||||
case CM:
|
||||
state = S_odd_CM_one_CX;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case S_one_CMC4: /* CM C4 */
|
||||
switch (GB18030_MAP[c]) {
|
||||
case C1:
|
||||
case C2:
|
||||
return (s - 1);
|
||||
case C4:
|
||||
state = S_one_C4_odd_CMC4; /* C4 CM C4 */
|
||||
break;
|
||||
case CM:
|
||||
state = S_even_CM_one_CX; /* CM CM C4 */
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case S_odd_CMC4: /* CM C4 CM C4 CM C4 */
|
||||
switch (GB18030_MAP[c]) {
|
||||
case C1:
|
||||
case C2:
|
||||
return (s - 1);
|
||||
case C4:
|
||||
state = S_one_C4_odd_CMC4;
|
||||
break;
|
||||
case CM:
|
||||
state = S_odd_CM_odd_CMC4;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case S_one_C4_odd_CMC4: /* C4 CM C4 */
|
||||
switch (GB18030_MAP[c]) {
|
||||
case C1:
|
||||
case C2:
|
||||
case C4:
|
||||
return (s - 1);
|
||||
case CM:
|
||||
state = S_even_CMC4; /* CM C4 CM C4 */
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case S_even_CMC4: /* CM C4 CM C4 */
|
||||
switch (GB18030_MAP[c]) {
|
||||
case C1:
|
||||
case C2:
|
||||
return (s - 3);
|
||||
case C4:
|
||||
state = S_one_C4_even_CMC4;
|
||||
break;
|
||||
case CM:
|
||||
state = S_odd_CM_even_CMC4;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case S_one_C4_even_CMC4: /* C4 CM C4 CM C4 */
|
||||
switch (GB18030_MAP[c]) {
|
||||
case C1:
|
||||
case C2:
|
||||
case C4:
|
||||
return (s - 3);
|
||||
case CM:
|
||||
state = S_odd_CMC4;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case S_odd_CM_odd_CMC4: /* CM CM C4 CM C4 CM C4 */
|
||||
switch (GB18030_MAP[c]) {
|
||||
case C1:
|
||||
case C2:
|
||||
case C4:
|
||||
return (s - 3);
|
||||
case CM:
|
||||
state = S_even_CM_odd_CMC4;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case S_even_CM_odd_CMC4: /* CM CM CM C4 CM C4 CM C4 */
|
||||
switch (GB18030_MAP[c]) {
|
||||
case C1:
|
||||
case C2:
|
||||
case C4:
|
||||
return (s - 1);
|
||||
case CM:
|
||||
state = S_odd_CM_odd_CMC4;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case S_odd_CM_even_CMC4: /* CM CM C4 CM C4 */
|
||||
switch (GB18030_MAP[c]) {
|
||||
case C1:
|
||||
case C2:
|
||||
case C4:
|
||||
return (s - 1);
|
||||
case CM:
|
||||
state = S_even_CM_even_CMC4;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case S_even_CM_even_CMC4: /* CM CM CM C4 CM C4 */
|
||||
switch (GB18030_MAP[c]) {
|
||||
case C1:
|
||||
case C2:
|
||||
case C4:
|
||||
return (s - 3);
|
||||
case CM:
|
||||
state = S_odd_CM_even_CMC4;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case S_odd_C4CM: /* C4 CM */ /* C4 CM C4 CM C4 CM*/
|
||||
switch (GB18030_MAP[c]) {
|
||||
case C1:
|
||||
case C2:
|
||||
case C4:
|
||||
return s;
|
||||
case CM:
|
||||
state = S_one_CM_odd_C4CM; /* CM C4 CM */
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case S_one_CM_odd_C4CM: /* CM C4 CM */ /* CM C4 CM C4 CM C4 CM */
|
||||
switch (GB18030_MAP[c]) {
|
||||
case C1:
|
||||
case C2:
|
||||
return (s - 2); /* |CM C4 CM */
|
||||
case C4:
|
||||
state = S_even_C4CM;
|
||||
break;
|
||||
case CM:
|
||||
state = S_even_CM_odd_C4CM;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case S_even_C4CM: /* C4 CM C4 CM */
|
||||
switch (GB18030_MAP[c]) {
|
||||
case C1:
|
||||
case C2:
|
||||
case C4:
|
||||
return (s - 2); /* C4|CM C4 CM */
|
||||
case CM:
|
||||
state = S_one_CM_even_C4CM;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case S_one_CM_even_C4CM: /* CM C4 CM C4 CM */
|
||||
switch (GB18030_MAP[c]) {
|
||||
case C1:
|
||||
case C2:
|
||||
return (s - 0); /*|CM C4 CM C4|CM */
|
||||
case C4:
|
||||
state = S_odd_C4CM;
|
||||
break;
|
||||
case CM:
|
||||
state = S_even_CM_even_C4CM;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case S_even_CM_odd_C4CM: /* CM CM C4 CM */
|
||||
switch (GB18030_MAP[c]) {
|
||||
case C1:
|
||||
case C2:
|
||||
case C4:
|
||||
return (s - 0); /* |CM CM|C4|CM */
|
||||
case CM:
|
||||
state = S_odd_CM_odd_C4CM;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case S_odd_CM_odd_C4CM: /* CM CM CM C4 CM */
|
||||
switch (GB18030_MAP[c]) {
|
||||
case C1:
|
||||
case C2:
|
||||
case C4:
|
||||
return (s - 2); /* |CM CM|CM C4 CM */
|
||||
case CM:
|
||||
state = S_even_CM_odd_C4CM;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case S_even_CM_even_C4CM: /* CM CM C4 CM C4 CM */
|
||||
switch (GB18030_MAP[c]) {
|
||||
case C1:
|
||||
case C2:
|
||||
case C4:
|
||||
return (s - 2); /* |CM CM|C4|CM C4 CM */
|
||||
case CM:
|
||||
state = S_odd_CM_even_C4CM;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case S_odd_CM_even_C4CM: /* CM CM CM C4 CM C4 CM */
|
||||
switch (GB18030_MAP[c]) {
|
||||
case C1:
|
||||
case C2:
|
||||
case C4:
|
||||
return (s - 0); /* |CM CM|CM C4 CM C4|CM */
|
||||
case CM:
|
||||
state = S_even_CM_even_C4CM;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_GB18030(("state %d\n", state));
|
||||
switch (state) {
|
||||
case S_START: return (s - 0);
|
||||
case S_one_C2: return (s - 0);
|
||||
case S_one_C4: return (s - 0);
|
||||
case S_one_CM: return (s - 0);
|
||||
|
||||
case S_odd_CM_one_CX: return (s - 1);
|
||||
case S_even_CM_one_CX: return (s - 0);
|
||||
|
||||
case S_one_CMC4: return (s - 1);
|
||||
case S_odd_CMC4: return (s - 1);
|
||||
case S_one_C4_odd_CMC4: return (s - 1);
|
||||
case S_even_CMC4: return (s - 3);
|
||||
case S_one_C4_even_CMC4: return (s - 3);
|
||||
|
||||
case S_odd_CM_odd_CMC4: return (s - 3);
|
||||
case S_even_CM_odd_CMC4: return (s - 1);
|
||||
|
||||
case S_odd_CM_even_CMC4: return (s - 1);
|
||||
case S_even_CM_even_CMC4: return (s - 3);
|
||||
|
||||
case S_odd_C4CM: return (s - 0);
|
||||
case S_one_CM_odd_C4CM: return (s - 2);
|
||||
case S_even_C4CM: return (s - 2);
|
||||
case S_one_CM_even_C4CM: return (s - 0);
|
||||
|
||||
case S_even_CM_odd_C4CM: return (s - 0);
|
||||
case S_odd_CM_odd_C4CM: return (s - 2);
|
||||
case S_even_CM_even_C4CM: return (s - 2);
|
||||
case S_odd_CM_even_C4CM: return (s - 0);
|
||||
}
|
||||
|
||||
return s; /* never come here. (escape warning) */
|
||||
}
|
||||
|
||||
static int
|
||||
gb18030_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
|
||||
{
|
||||
@ -477,20 +834,25 @@ gb18030_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
|
||||
|
||||
OnigEncodingType OnigEncodingGB18030 = {
|
||||
gb18030_mbc_enc_len,
|
||||
gb18030_mbc_enc_len_se,
|
||||
"GB18030", /* name */
|
||||
4, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_is_mbc_newline_0x0a_se,
|
||||
gb18030_mbc_to_code,
|
||||
gb18030_mbc_to_code_se,
|
||||
onigenc_mb4_code_to_mbclen,
|
||||
gb18030_code_to_mbc,
|
||||
gb18030_mbc_case_fold,
|
||||
gb18030_mbc_case_fold_se,
|
||||
onigenc_ascii_apply_all_case_fold,
|
||||
onigenc_ascii_get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
gb18030_is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
gb18030_left_adjust_char_head,
|
||||
gb18030_left_adjust_char_head_se,
|
||||
gb18030_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
||||
|
@ -216,6 +216,24 @@ mbc_case_fold(OnigCaseFoldType flag, const UChar** pp,
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag, OnigPosition* pp,
|
||||
OnigPosition end ARG_UNUSED, UChar* lower)
|
||||
{
|
||||
const UChar c = ONIG_CHARAT(*pp);
|
||||
|
||||
if (c == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
*lower++ = 's';
|
||||
*lower = 's';
|
||||
(*pp)++;
|
||||
return 2;
|
||||
}
|
||||
|
||||
*lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(c);
|
||||
(*pp)++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
is_mbc_ambiguous(OnigCaseFoldType flag,
|
||||
@ -254,20 +272,25 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
|
||||
OnigEncodingType OnigEncodingISO_8859_1 = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
onigenc_single_byte_mbc_enc_len_se,
|
||||
"ISO-8859-1", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_is_mbc_newline_0x0a_se,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_mbc_to_code_se,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
mbc_case_fold,
|
||||
mbc_case_fold_se,
|
||||
apply_all_case_fold,
|
||||
get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_single_byte_left_adjust_char_head_se,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
||||
|
@ -121,6 +121,24 @@ mbc_case_fold(OnigCaseFoldType flag,
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag,
|
||||
OnigPosition* pp, OnigPosition end ARG_UNUSED, UChar* lower)
|
||||
{
|
||||
const UChar c = ONIG_CHARAT(*pp);
|
||||
|
||||
if (c == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
*lower++ = 's';
|
||||
*lower = 's';
|
||||
(*pp)++;
|
||||
return 2;
|
||||
}
|
||||
|
||||
*lower = ENC_ISO_8859_10_TO_LOWER_CASE(c);
|
||||
(*pp)++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
|
||||
@ -221,20 +239,25 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
|
||||
OnigEncodingType OnigEncodingISO_8859_10 = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
onigenc_single_byte_mbc_enc_len_se,
|
||||
"ISO-8859-10", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_is_mbc_newline_0x0a_se,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_mbc_to_code_se,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
mbc_case_fold,
|
||||
mbc_case_fold_se,
|
||||
apply_all_case_fold,
|
||||
get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_single_byte_left_adjust_char_head_se,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
||||
|
@ -78,20 +78,25 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
|
||||
OnigEncodingType OnigEncodingISO_8859_11 = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
onigenc_single_byte_mbc_enc_len_se,
|
||||
"ISO-8859-11", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_is_mbc_newline_0x0a_se,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_mbc_to_code_se,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
onigenc_ascii_mbc_case_fold,
|
||||
onigenc_ascii_mbc_case_fold_se,
|
||||
onigenc_ascii_apply_all_case_fold,
|
||||
onigenc_ascii_get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_single_byte_left_adjust_char_head_se,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
||||
|
@ -121,6 +121,24 @@ mbc_case_fold(OnigCaseFoldType flag,
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag,
|
||||
OnigPosition* pp, OnigPosition end ARG_UNUSED, UChar* lower)
|
||||
{
|
||||
const UChar c = ONIG_CHARAT(*pp);
|
||||
|
||||
if (c == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
*lower++ = 's';
|
||||
*lower = 's';
|
||||
(*pp)++;
|
||||
return 2;
|
||||
}
|
||||
|
||||
*lower = ENC_ISO_8859_13_TO_LOWER_CASE(c);
|
||||
(*pp)++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
|
||||
@ -210,20 +228,25 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
|
||||
OnigEncodingType OnigEncodingISO_8859_13 = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
onigenc_single_byte_mbc_enc_len_se,
|
||||
"ISO-8859-13", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_is_mbc_newline_0x0a_se,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_mbc_to_code_se,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
mbc_case_fold,
|
||||
mbc_case_fold_se,
|
||||
apply_all_case_fold,
|
||||
get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_single_byte_left_adjust_char_head_se,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
||||
|
@ -121,6 +121,24 @@ mbc_case_fold(OnigCaseFoldType flag,
|
||||
return 1; /* return byte length of converted char to lower */
|
||||
}
|
||||
|
||||
static int
|
||||
mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag,
|
||||
OnigPosition* pp, OnigPosition end ARG_UNUSED, UChar* lower)
|
||||
{
|
||||
const UChar c = ONIG_CHARAT(*pp);
|
||||
|
||||
if (c == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
*lower++ = 's';
|
||||
*lower = 's';
|
||||
(*pp)++;
|
||||
return 2;
|
||||
}
|
||||
|
||||
*lower = ENC_ISO_8859_14_TO_LOWER_CASE(c);
|
||||
(*pp)++;
|
||||
return 1; /* return byte length of converted char to lower */
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
is_mbc_ambiguous(OnigCaseFoldType flag,
|
||||
@ -223,20 +241,25 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
|
||||
OnigEncodingType OnigEncodingISO_8859_14 = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
onigenc_single_byte_mbc_enc_len_se,
|
||||
"ISO-8859-14", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_is_mbc_newline_0x0a_se,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_mbc_to_code_se,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
mbc_case_fold,
|
||||
mbc_case_fold_se,
|
||||
apply_all_case_fold,
|
||||
get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_single_byte_left_adjust_char_head_se,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
||||
|
@ -121,6 +121,24 @@ mbc_case_fold(OnigCaseFoldType flag,
|
||||
return 1; /* return byte length of converted char to lower */
|
||||
}
|
||||
|
||||
static int
|
||||
mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag,
|
||||
OnigPosition* pp, OnigPosition end ARG_UNUSED, UChar* lower)
|
||||
{
|
||||
const UChar c = ONIG_CHARAT(*pp);
|
||||
|
||||
if (c == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
*lower++ = 's';
|
||||
*lower = 's';
|
||||
(*pp)++;
|
||||
return 2;
|
||||
}
|
||||
|
||||
*lower = ENC_ISO_8859_15_TO_LOWER_CASE(c);
|
||||
(*pp)++;
|
||||
return 1; /* return byte length of converted char to lower */
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
is_mbc_ambiguous(OnigCaseFoldType flag,
|
||||
@ -217,20 +235,25 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
|
||||
OnigEncodingType OnigEncodingISO_8859_15 = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
onigenc_single_byte_mbc_enc_len_se,
|
||||
"ISO-8859-15", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_is_mbc_newline_0x0a_se,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_mbc_to_code_se,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
mbc_case_fold,
|
||||
mbc_case_fold_se,
|
||||
apply_all_case_fold,
|
||||
get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_single_byte_left_adjust_char_head_se,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
||||
|
@ -121,6 +121,24 @@ mbc_case_fold(OnigCaseFoldType flag,
|
||||
return 1; /* return byte length of converted char to lower */
|
||||
}
|
||||
|
||||
static int
|
||||
mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag,
|
||||
OnigPosition* pp, OnigPosition end ARG_UNUSED, UChar* lower)
|
||||
{
|
||||
const UChar c = ONIG_CHARAT(*pp);
|
||||
|
||||
if (c == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
*lower++ = 's';
|
||||
*lower = 's';
|
||||
(*pp)++;
|
||||
return 2;
|
||||
}
|
||||
|
||||
*lower = ENC_ISO_8859_16_TO_LOWER_CASE(c);
|
||||
(*pp)++;
|
||||
return 1; /* return byte length of converted char to lower */
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
|
||||
@ -219,20 +237,25 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
|
||||
OnigEncodingType OnigEncodingISO_8859_16 = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
onigenc_single_byte_mbc_enc_len_se,
|
||||
"ISO-8859-16", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_is_mbc_newline_0x0a_se,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_mbc_to_code_se,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
mbc_case_fold,
|
||||
mbc_case_fold_se,
|
||||
apply_all_case_fold,
|
||||
get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_single_byte_left_adjust_char_head_se,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
||||
|
@ -121,6 +121,24 @@ mbc_case_fold(OnigCaseFoldType flag,
|
||||
return 1; /* return byte length of converted char to lower */
|
||||
}
|
||||
|
||||
static int
|
||||
mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag,
|
||||
OnigPosition* pp, OnigPosition end ARG_UNUSED, UChar* lower)
|
||||
{
|
||||
const UChar c = ONIG_CHARAT(*pp);
|
||||
|
||||
if (c == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
*lower++ = 's';
|
||||
*lower = 's';
|
||||
(*pp)++;
|
||||
return 2;
|
||||
}
|
||||
|
||||
*lower = ENC_ISO_8859_2_TO_LOWER_CASE(c);
|
||||
(*pp)++;
|
||||
return 1; /* return byte length of converted char to lower */
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
|
||||
@ -217,20 +235,25 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
|
||||
OnigEncodingType OnigEncodingISO_8859_2 = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
onigenc_single_byte_mbc_enc_len_se,
|
||||
"ISO-8859-2", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_is_mbc_newline_0x0a_se,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_mbc_to_code_se,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
mbc_case_fold,
|
||||
mbc_case_fold_se,
|
||||
apply_all_case_fold,
|
||||
get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_single_byte_left_adjust_char_head_se,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
||||
|
@ -121,6 +121,24 @@ mbc_case_fold(OnigCaseFoldType flag, const UChar** pp,
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag, OnigPosition* pp,
|
||||
OnigPosition end ARG_UNUSED, UChar* lower)
|
||||
{
|
||||
const UChar c = ONIG_CHARAT(*pp);
|
||||
|
||||
if (c == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
*lower++ = 's';
|
||||
*lower = 's';
|
||||
(*pp)++;
|
||||
return 2;
|
||||
}
|
||||
|
||||
*lower = ENC_ISO_8859_3_TO_LOWER_CASE(c);
|
||||
(*pp)++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
|
||||
@ -217,20 +235,25 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
|
||||
OnigEncodingType OnigEncodingISO_8859_3 = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
onigenc_single_byte_mbc_enc_len_se,
|
||||
"ISO-8859-3", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_is_mbc_newline_0x0a_se,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_mbc_to_code_se,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
mbc_case_fold,
|
||||
mbc_case_fold_se,
|
||||
apply_all_case_fold,
|
||||
get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_single_byte_left_adjust_char_head_se,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
||||
|
@ -121,6 +121,24 @@ mbc_case_fold(OnigCaseFoldType flag,
|
||||
return 1; /* return byte length of converted char to lower */
|
||||
}
|
||||
|
||||
static int
|
||||
mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag,
|
||||
OnigPosition* pp, OnigPosition end ARG_UNUSED, UChar* lower)
|
||||
{
|
||||
const UChar c = ONIG_CHARAT(*pp);
|
||||
|
||||
if (c == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
*lower++ = 's';
|
||||
*lower = 's';
|
||||
(*pp)++;
|
||||
return 2;
|
||||
}
|
||||
|
||||
*lower = ENC_ISO_8859_4_TO_LOWER_CASE(c);
|
||||
(*pp)++;
|
||||
return 1; /* return byte length of converted char to lower */
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
|
||||
@ -219,20 +237,25 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
|
||||
OnigEncodingType OnigEncodingISO_8859_4 = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
onigenc_single_byte_mbc_enc_len_se,
|
||||
"ISO-8859-4", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_is_mbc_newline_0x0a_se,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_mbc_to_code_se,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
mbc_case_fold,
|
||||
mbc_case_fold_se,
|
||||
apply_all_case_fold,
|
||||
get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_single_byte_left_adjust_char_head_se,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
||||
|
@ -114,6 +114,17 @@ mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag ARG_UNUSED,
|
||||
OnigPosition* pp, OnigPosition end ARG_UNUSED, UChar* lower)
|
||||
{
|
||||
const UChar c = ONIG_CHARAT(*pp);
|
||||
|
||||
*lower = ENC_ISO_8859_5_TO_LOWER_CASE(c);
|
||||
(*pp)++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
|
||||
@ -208,20 +219,25 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
|
||||
OnigEncodingType OnigEncodingISO_8859_5 = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
onigenc_single_byte_mbc_enc_len_se,
|
||||
"ISO-8859-5", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_is_mbc_newline_0x0a_se,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_mbc_to_code_se,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
mbc_case_fold,
|
||||
mbc_case_fold_se,
|
||||
apply_all_case_fold,
|
||||
get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_single_byte_left_adjust_char_head_se,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
||||
|
@ -78,20 +78,25 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
|
||||
OnigEncodingType OnigEncodingISO_8859_6 = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
onigenc_single_byte_mbc_enc_len_se,
|
||||
"ISO-8859-6", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_is_mbc_newline_0x0a_se,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_mbc_to_code_se,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
onigenc_ascii_mbc_case_fold,
|
||||
onigenc_ascii_mbc_case_fold_se,
|
||||
onigenc_ascii_apply_all_case_fold,
|
||||
onigenc_ascii_get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_single_byte_left_adjust_char_head_se,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
||||
|
@ -114,6 +114,17 @@ mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag ARG_UNUSED,
|
||||
OnigPosition* pp, OnigPosition end ARG_UNUSED, UChar* lower)
|
||||
{
|
||||
const UChar c = ONIG_CHARAT(*pp);
|
||||
|
||||
*lower = ENC_ISO_8859_7_TO_LOWER_CASE(c);
|
||||
(*pp)++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
is_mbc_ambiguous(OnigCaseFoldType flag,
|
||||
@ -204,20 +215,25 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
|
||||
OnigEncodingType OnigEncodingISO_8859_7 = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
onigenc_single_byte_mbc_enc_len_se,
|
||||
"ISO-8859-7", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_is_mbc_newline_0x0a_se,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_mbc_to_code_se,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
mbc_case_fold,
|
||||
mbc_case_fold_se,
|
||||
apply_all_case_fold,
|
||||
get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_single_byte_left_adjust_char_head_se,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
||||
|
@ -78,20 +78,25 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
|
||||
OnigEncodingType OnigEncodingISO_8859_8 = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
onigenc_single_byte_mbc_enc_len_se,
|
||||
"ISO-8859-8", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_is_mbc_newline_0x0a_se,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_mbc_to_code_se,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
onigenc_ascii_mbc_case_fold,
|
||||
onigenc_ascii_mbc_case_fold_se,
|
||||
onigenc_ascii_apply_all_case_fold,
|
||||
onigenc_ascii_get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_single_byte_left_adjust_char_head_se,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
||||
|
@ -121,6 +121,24 @@ mbc_case_fold(OnigCaseFoldType flag,
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag,
|
||||
OnigPosition* pp, OnigPosition end ARG_UNUSED, UChar* lower)
|
||||
{
|
||||
const UChar c = ONIG_CHARAT(*pp);
|
||||
|
||||
if (c == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
*lower++ = 's';
|
||||
*lower = 's';
|
||||
(*pp)++;
|
||||
return 2;
|
||||
}
|
||||
|
||||
*lower = ENC_ISO_8859_9_TO_LOWER_CASE(c);
|
||||
(*pp)++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
|
||||
@ -210,20 +228,25 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
|
||||
OnigEncodingType OnigEncodingISO_8859_9 = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
onigenc_single_byte_mbc_enc_len_se,
|
||||
"ISO-8859-9", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_is_mbc_newline_0x0a_se,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_mbc_to_code_se,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
mbc_case_fold,
|
||||
mbc_case_fold_se,
|
||||
apply_all_case_fold,
|
||||
get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_single_byte_left_adjust_char_head_se,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
||||
|
@ -115,6 +115,17 @@ koi8_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
koi8_mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag ARG_UNUSED,
|
||||
OnigPosition* pp, OnigPosition end ARG_UNUSED, UChar* lower)
|
||||
{
|
||||
const UChar c = ONIG_CHARAT(*pp);
|
||||
|
||||
*lower = ENC_KOI8_TO_LOWER_CASE(c);
|
||||
(*pp)++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
koi8_is_mbc_ambiguous(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end)
|
||||
@ -232,20 +243,25 @@ koi8_get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
|
||||
OnigEncodingType OnigEncodingKOI8 = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
onigenc_single_byte_mbc_enc_len_se,
|
||||
"KOI8", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_is_mbc_newline_0x0a_se,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_mbc_to_code_se,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
koi8_mbc_case_fold,
|
||||
koi8_mbc_case_fold_se,
|
||||
koi8_apply_all_case_fold,
|
||||
koi8_get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
koi8_is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_single_byte_left_adjust_char_head_se,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
||||
|
@ -114,6 +114,17 @@ koi8_r_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
koi8_r_mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag ARG_UNUSED,
|
||||
OnigPosition* pp, OnigPosition end ARG_UNUSED, UChar* lower)
|
||||
{
|
||||
const UChar c = ONIG_CHARAT(*pp);
|
||||
|
||||
*lower = ENC_KOI8_R_TO_LOWER_CASE(c);
|
||||
(*pp)++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
koi8_r_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
|
||||
@ -194,20 +205,25 @@ koi8_r_get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
|
||||
OnigEncodingType OnigEncodingKOI8_R = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
onigenc_single_byte_mbc_enc_len_se,
|
||||
"KOI8-R", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_is_mbc_newline_0x0a_se,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_mbc_to_code_se,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
koi8_r_mbc_case_fold,
|
||||
koi8_r_mbc_case_fold_se,
|
||||
koi8_r_apply_all_case_fold,
|
||||
koi8_r_get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
koi8_r_is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_single_byte_left_adjust_char_head_se,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
||||
|
@ -174,6 +174,12 @@ mbc_enc_len(const UChar* p)
|
||||
return EncLen_SJIS[*p];
|
||||
}
|
||||
|
||||
static int
|
||||
mbc_enc_len_se(OnigIterator* it, OnigPosition p)
|
||||
{
|
||||
return EncLen_SJIS[ONIG_CHARAT(p)];
|
||||
}
|
||||
|
||||
static int
|
||||
code_to_mbclen(OnigCodePoint code)
|
||||
{
|
||||
@ -212,6 +218,25 @@ mbc_to_code(const UChar* p, const UChar* end)
|
||||
return n;
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
mbc_to_code_se(OnigIterator* it, OnigPosition p, OnigPosition end)
|
||||
{
|
||||
int c, i, len;
|
||||
OnigCodePoint n;
|
||||
|
||||
len = mbc_enc_len_se(it, p);
|
||||
c = ONIG_CHARAT(p++);
|
||||
n = c;
|
||||
if (len == 1) return n;
|
||||
|
||||
for (i = 1; i < len; i++) {
|
||||
if (p >= end) break;
|
||||
c = ONIG_CHARAT(p++);
|
||||
n <<= 8; n += c;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
static int
|
||||
code_to_mbc(OnigCodePoint code, UChar *buf)
|
||||
{
|
||||
@ -309,7 +334,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
|
||||
static int
|
||||
mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
|
||||
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
|
||||
const UChar** pp, const UChar* end, UChar* lower)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
@ -329,6 +354,28 @@ mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag ARG_UNUSED,
|
||||
OnigPosition* pp, OnigPosition end, UChar* lower)
|
||||
{
|
||||
const UChar c = ONIG_CHARAT(*pp);
|
||||
|
||||
if (ONIGENC_IS_MBC_ASCII_SE(c)) {
|
||||
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(c);
|
||||
(*pp)++;
|
||||
return 1;
|
||||
}
|
||||
else {
|
||||
OnigCodePoint code;
|
||||
int len;
|
||||
|
||||
code = get_lower_case(mbc_to_code_se(it, *pp, end));
|
||||
len = code_to_mbc(code, lower);
|
||||
(*pp) += len;
|
||||
return len; /* return byte length of converted char to lower */
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
is_mbc_ambiguous(OnigCaseFoldType flag,
|
||||
@ -377,6 +424,29 @@ left_adjust_char_head(const UChar* start, const UChar* s)
|
||||
return (UChar* )(p + ((s - p) & ~1));
|
||||
}
|
||||
|
||||
static OnigPosition
|
||||
left_adjust_char_head_se(OnigIterator* it, OnigPosition start, OnigPosition s)
|
||||
{
|
||||
OnigPosition p;
|
||||
int len;
|
||||
|
||||
if (s <= start) return s;
|
||||
p = s;
|
||||
|
||||
if (SJIS_ISMB_TRAIL(ONIG_CHARAT(p))) {
|
||||
while (p > start) {
|
||||
if (! SJIS_ISMB_FIRST(ONIG_CHARAT(--p))) {
|
||||
p++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
len = mbc_enc_len_se(it, p);
|
||||
if (p + len > s) return p;
|
||||
p += len;
|
||||
return (p + ((s - p) & ~1));
|
||||
}
|
||||
|
||||
static int
|
||||
is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
|
||||
{
|
||||
@ -531,40 +601,50 @@ get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out,
|
||||
#ifdef ENC_CP932
|
||||
OnigEncodingType OnigEncodingCP932 = {
|
||||
mbc_enc_len,
|
||||
mbc_enc_len_se,
|
||||
"CP932", /* name */
|
||||
2, /* max byte length */
|
||||
1, /* min byte length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_is_mbc_newline_0x0a_se,
|
||||
mbc_to_code,
|
||||
mbc_to_code_se,
|
||||
code_to_mbclen,
|
||||
code_to_mbc,
|
||||
mbc_case_fold,
|
||||
mbc_case_fold_se,
|
||||
apply_all_case_fold,
|
||||
get_case_fold_codes_by_str,
|
||||
property_name_to_ctype,
|
||||
is_code_ctype,
|
||||
get_ctype_code_range,
|
||||
left_adjust_char_head,
|
||||
left_adjust_char_head_se,
|
||||
is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
||||
#else
|
||||
OnigEncodingType OnigEncodingSJIS = {
|
||||
mbc_enc_len,
|
||||
mbc_enc_len_se,
|
||||
"Shift_JIS", /* name */
|
||||
2, /* max byte length */
|
||||
1, /* min byte length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_is_mbc_newline_0x0a_se,
|
||||
mbc_to_code,
|
||||
mbc_to_code_se,
|
||||
code_to_mbclen,
|
||||
code_to_mbc,
|
||||
mbc_case_fold,
|
||||
mbc_case_fold_se,
|
||||
apply_all_case_fold,
|
||||
get_case_fold_codes_by_str,
|
||||
property_name_to_ctype,
|
||||
is_code_ctype,
|
||||
get_ctype_code_range,
|
||||
left_adjust_char_head,
|
||||
left_adjust_char_head_se,
|
||||
is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
||||
|
@ -108,6 +108,7 @@ typedef struct {
|
||||
|
||||
#define numberof(array) (int)(sizeof(array) / sizeof((array)[0]))
|
||||
#define CODE_RANGES_NUM numberof(CodeRanges)
|
||||
#define CODE_SCRIPTS_NUM numberof(CodeScripts)
|
||||
|
||||
extern int
|
||||
onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
@ -148,6 +149,17 @@ onigenc_utf16_32_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out,
|
||||
return onigenc_unicode_ctype_code_range(ctype, ranges);
|
||||
}
|
||||
|
||||
extern const OnigCodePoint*
|
||||
onigenc_unicode_code_script(OnigCodePoint code)
|
||||
{
|
||||
for (int ctype = 0; ctype < CODE_SCRIPTS_NUM; ctype++) {
|
||||
if (onig_is_in_code_range((UChar*) CodeScripts[ctype], code)) {
|
||||
return CodeScripts[ctype];
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "st.h"
|
||||
|
||||
#define PROPERTY_NAME_MAX_SIZE (MAX_WORD_LENGTH + 1)
|
||||
@ -220,12 +232,19 @@ static struct st_hash_type type_code3_hash = {
|
||||
code3_hash,
|
||||
};
|
||||
|
||||
|
||||
static st_table* FoldTable; /* fold-1, fold-2, fold-3 */
|
||||
static st_table* Unfold1Table;
|
||||
static st_table* Unfold2Table;
|
||||
static st_table* Unfold3Table;
|
||||
static int CaseFoldInited = 0;
|
||||
#ifdef USE_SHARED_UNICODE_TABLE
|
||||
st_table* FoldTable; /* fold-1, fold-2, fold-3 */
|
||||
st_table* Unfold1Table;
|
||||
st_table* Unfold2Table;
|
||||
st_table* Unfold3Table;
|
||||
int CaseFoldInited = 0;
|
||||
#else
|
||||
static st_table* FoldTable; /* fold-1, fold-2, fold-3 */
|
||||
static st_table* Unfold1Table;
|
||||
static st_table* Unfold2Table;
|
||||
static st_table* Unfold3Table;
|
||||
static int CaseFoldInited = 0;
|
||||
#endif //USE_SHARED_UNICODE_TABLE
|
||||
|
||||
static int init_case_fold_table(void)
|
||||
{
|
||||
@ -338,6 +357,59 @@ onigenc_unicode_mbc_case_fold(OnigEncoding enc,
|
||||
return len;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_unicode_mbc_case_fold_se(OnigIterator* it, OnigEncoding enc,
|
||||
OnigCaseFoldType flag ARG_UNUSED, OnigPosition* pp, OnigPosition end,
|
||||
UChar* fold)
|
||||
{
|
||||
CodePointList3 *to;
|
||||
OnigCodePoint code;
|
||||
int i, len, rlen;
|
||||
OnigPosition p = *pp;
|
||||
|
||||
if (CaseFoldInited == 0) init_case_fold_table();
|
||||
|
||||
code = ONIGENC_MBC_TO_CODE_SE(it, enc, p, end);
|
||||
len = enclen_se(it, enc, p);
|
||||
*pp += len;
|
||||
|
||||
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
|
||||
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
|
||||
if (code == 0x0049) {
|
||||
return ONIGENC_CODE_TO_MBC(enc, 0x0131, fold);
|
||||
}
|
||||
else if (code == 0x0130) {
|
||||
return ONIGENC_CODE_TO_MBC(enc, 0x0069, fold);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0) {
|
||||
if (to->n == 1) {
|
||||
return ONIGENC_CODE_TO_MBC(enc, to->code[0], fold);
|
||||
}
|
||||
#if 0
|
||||
/* NO NEEDS TO CHECK */
|
||||
else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
#else
|
||||
else {
|
||||
#endif
|
||||
rlen = 0;
|
||||
for (i = 0; i < to->n; i++) {
|
||||
len = ONIGENC_CODE_TO_MBC(enc, to->code[i], fold);
|
||||
fold += len;
|
||||
rlen += len;
|
||||
}
|
||||
return rlen;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
*fold++ = ONIG_CHARAT(p++);
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag,
|
||||
OnigApplyAllCaseFoldFunc f, void* arg)
|
||||
|
@ -25940,6 +25940,112 @@ static const OnigCodePoint* const CodeRanges[] = {
|
||||
CR_In_No_Block,
|
||||
#endif /* USE_UNICODE_PROPERTIES */
|
||||
};
|
||||
|
||||
static const OnigCodePoint* const CodeScripts[] = {
|
||||
CR_Common,
|
||||
CR_Latin,
|
||||
CR_Greek,
|
||||
CR_Cyrillic,
|
||||
CR_Armenian,
|
||||
CR_Hebrew,
|
||||
CR_Arabic,
|
||||
CR_Syriac,
|
||||
CR_Thaana,
|
||||
CR_Devanagari,
|
||||
CR_Bengali,
|
||||
CR_Gurmukhi,
|
||||
CR_Gujarati,
|
||||
CR_Oriya,
|
||||
CR_Tamil,
|
||||
CR_Telugu,
|
||||
CR_Kannada,
|
||||
CR_Malayalam,
|
||||
CR_Sinhala,
|
||||
CR_Thai,
|
||||
CR_Lao,
|
||||
CR_Tibetan,
|
||||
CR_Myanmar,
|
||||
CR_Georgian,
|
||||
CR_Hangul,
|
||||
CR_Ethiopic,
|
||||
CR_Cherokee,
|
||||
CR_Canadian_Aboriginal,
|
||||
CR_Ogham,
|
||||
CR_Runic,
|
||||
CR_Khmer,
|
||||
CR_Mongolian,
|
||||
CR_Hiragana,
|
||||
CR_Katakana,
|
||||
CR_Bopomofo,
|
||||
CR_Han,
|
||||
CR_Yi,
|
||||
CR_Old_Italic,
|
||||
CR_Gothic,
|
||||
CR_Deseret,
|
||||
CR_Inherited,
|
||||
CR_Tagalog,
|
||||
CR_Hanunoo,
|
||||
CR_Buhid,
|
||||
CR_Tagbanwa,
|
||||
CR_Limbu,
|
||||
CR_Tai_Le,
|
||||
CR_Linear_B,
|
||||
CR_Ugaritic,
|
||||
CR_Shavian,
|
||||
CR_Osmanya,
|
||||
CR_Cypriot,
|
||||
CR_Braille,
|
||||
CR_Buginese,
|
||||
CR_Coptic,
|
||||
CR_New_Tai_Lue,
|
||||
CR_Glagolitic,
|
||||
CR_Tifinagh,
|
||||
CR_Syloti_Nagri,
|
||||
CR_Old_Persian,
|
||||
CR_Kharoshthi,
|
||||
CR_Balinese,
|
||||
CR_Cuneiform,
|
||||
CR_Phoenician,
|
||||
CR_Phags_Pa,
|
||||
CR_Nko,
|
||||
CR_Sundanese,
|
||||
CR_Lepcha,
|
||||
CR_Ol_Chiki,
|
||||
CR_Vai,
|
||||
CR_Saurashtra,
|
||||
CR_Kayah_Li,
|
||||
CR_Rejang,
|
||||
CR_Lycian,
|
||||
CR_Carian,
|
||||
CR_Lydian,
|
||||
CR_Cham,
|
||||
CR_Tai_Tham,
|
||||
CR_Tai_Viet,
|
||||
CR_Avestan,
|
||||
CR_Egyptian_Hieroglyphs,
|
||||
CR_Samaritan,
|
||||
CR_Lisu,
|
||||
CR_Bamum,
|
||||
CR_Javanese,
|
||||
CR_Meetei_Mayek,
|
||||
CR_Imperial_Aramaic,
|
||||
CR_Old_South_Arabian,
|
||||
CR_Inscriptional_Parthian,
|
||||
CR_Inscriptional_Pahlavi,
|
||||
CR_Old_Turkic,
|
||||
CR_Kaithi,
|
||||
CR_Batak,
|
||||
CR_Brahmi,
|
||||
CR_Mandaic,
|
||||
CR_Chakma,
|
||||
CR_Meroitic_Cursive,
|
||||
CR_Meroitic_Hieroglyphs,
|
||||
CR_Miao,
|
||||
CR_Sharada,
|
||||
CR_Sora_Sompeng,
|
||||
CR_Takri
|
||||
};
|
||||
|
||||
struct uniname2ctype_struct {
|
||||
int name, ctype;
|
||||
};
|
||||
|
@ -54,6 +54,12 @@ utf16be_mbc_enc_len(const UChar* p)
|
||||
return EncLen_UTF16[*p];
|
||||
}
|
||||
|
||||
static int
|
||||
utf16be_mbc_enc_len_se(OnigIterator* it, OnigPosition p)
|
||||
{
|
||||
return EncLen_UTF16[ONIG_CHARAT(p)];
|
||||
}
|
||||
|
||||
static int
|
||||
utf16be_is_mbc_newline(const UChar* p, const UChar* end)
|
||||
{
|
||||
@ -71,6 +77,26 @@ utf16be_is_mbc_newline(const UChar* p, const UChar* end)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
utf16be_is_mbc_newline_se(OnigIterator* it, OnigPosition p, OnigPosition end)
|
||||
{
|
||||
if (p + 1 < end) {
|
||||
const UChar c0 = ONIG_CHARAT(p);
|
||||
const UChar c1 = ONIG_CHARAT(p+1);
|
||||
|
||||
if (c1 == 0x0a && c0 == 0x00)
|
||||
return 1;
|
||||
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
|
||||
if ((c1 == 0x0b || c1 == 0x0c || c1 == 0x0d || c1 == 0x85)
|
||||
&& c0 == 0x00)
|
||||
return 1;
|
||||
if (c0 == 0x20 && (c1 == 0x29 || c1 == 0x28))
|
||||
return 1;
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
utf16be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
|
||||
{
|
||||
@ -87,6 +113,24 @@ utf16be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
|
||||
return code;
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
utf16be_mbc_to_code_se(OnigIterator* it, OnigPosition p, OnigPosition end ARG_UNUSED)
|
||||
{
|
||||
OnigCodePoint code;
|
||||
const UChar c0 = ONIG_CHARAT(p);
|
||||
const UChar c1 = ONIG_CHARAT(p+1);
|
||||
|
||||
if (UTF16_IS_SURROGATE_FIRST(c0)) {
|
||||
code = ((((c0 - 0xd8) << 2) + ((c1 & 0xc0) >> 6) + 1) << 16)
|
||||
+ ((((c1 & 0x3f) << 2) + (ONIG_CHARAT(p+2) - 0xdc)) << 8)
|
||||
+ ONIG_CHARAT(p+3);
|
||||
}
|
||||
else {
|
||||
code = c0 * 256 + c1;
|
||||
}
|
||||
return code;
|
||||
}
|
||||
|
||||
static int
|
||||
utf16be_code_to_mbclen(OnigCodePoint code)
|
||||
{
|
||||
@ -145,6 +189,35 @@ utf16be_mbc_case_fold(OnigCaseFoldType flag,
|
||||
pp, end, fold);
|
||||
}
|
||||
|
||||
static int
|
||||
utf16be_mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag,
|
||||
OnigPosition* pp, OnigPosition end, UChar* fold)
|
||||
{
|
||||
const UChar c0 = ONIG_CHARAT(*pp);
|
||||
const UChar c1 = ONIG_CHARAT(*pp+1);
|
||||
|
||||
if (ONIGENC_IS_ASCII_CODE(c1) && c0 == 0) {
|
||||
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
|
||||
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
|
||||
if (c1 == 0x49) {
|
||||
*fold++ = 0x01;
|
||||
*fold = 0x31;
|
||||
(*pp) += 2;
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
*fold++ = 0;
|
||||
*fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(c1);
|
||||
*pp += 2;
|
||||
return 2;
|
||||
}
|
||||
else
|
||||
return onigenc_unicode_mbc_case_fold_se(it, ONIG_ENCODING_UTF16_BE, flag,
|
||||
pp, end, fold);
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
utf16be_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
|
||||
@ -194,6 +267,21 @@ utf16be_left_adjust_char_head(const UChar* start, const UChar* s)
|
||||
return (UChar* )s;
|
||||
}
|
||||
|
||||
static OnigPosition
|
||||
utf16be_left_adjust_char_head_se(OnigIterator* it, OnigPosition start, OnigPosition s)
|
||||
{
|
||||
if (s <= start) return s;
|
||||
|
||||
if ((s - start) % 2 == 1) {
|
||||
s--;
|
||||
}
|
||||
|
||||
if (UTF16_IS_SURROGATE_SECOND(ONIG_CHARAT(s)) && s > start + 1)
|
||||
s -= 2;
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
static int
|
||||
utf16be_get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
|
||||
@ -204,20 +292,25 @@ utf16be_get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
|
||||
OnigEncodingType OnigEncodingUTF16_BE = {
|
||||
utf16be_mbc_enc_len,
|
||||
utf16be_mbc_enc_len_se,
|
||||
"UTF-16BE", /* name */
|
||||
4, /* max byte length */
|
||||
2, /* min byte length */
|
||||
utf16be_is_mbc_newline,
|
||||
utf16be_is_mbc_newline_se,
|
||||
utf16be_mbc_to_code,
|
||||
utf16be_mbc_to_code_se,
|
||||
utf16be_code_to_mbclen,
|
||||
utf16be_code_to_mbc,
|
||||
utf16be_mbc_case_fold,
|
||||
utf16be_mbc_case_fold_se,
|
||||
onigenc_unicode_apply_all_case_fold,
|
||||
utf16be_get_case_fold_codes_by_str,
|
||||
onigenc_unicode_property_name_to_ctype,
|
||||
onigenc_unicode_is_code_ctype,
|
||||
onigenc_utf16_32_get_ctype_code_range,
|
||||
utf16be_left_adjust_char_head,
|
||||
utf16be_left_adjust_char_head_se,
|
||||
onigenc_always_false_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_UNICODE,
|
||||
};
|
||||
|
@ -60,6 +60,12 @@ utf16le_mbc_enc_len(const UChar* p)
|
||||
return EncLen_UTF16[*(p+1)];
|
||||
}
|
||||
|
||||
static int
|
||||
utf16le_mbc_enc_len_se(OnigIterator* it, OnigPosition p)
|
||||
{
|
||||
return EncLen_UTF16[ONIG_CHARAT(p+1)];
|
||||
}
|
||||
|
||||
static int
|
||||
utf16le_is_mbc_newline(const UChar* p, const UChar* end)
|
||||
{
|
||||
@ -77,6 +83,26 @@ utf16le_is_mbc_newline(const UChar* p, const UChar* end)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
utf16le_is_mbc_newline_se(OnigIterator* it, OnigPosition p, OnigPosition end)
|
||||
{
|
||||
if (p + 1 < end) {
|
||||
const UChar c0 = ONIG_CHARAT(p);
|
||||
const UChar c1 = ONIG_CHARAT(p+1);
|
||||
|
||||
if (c0 == 0x0a && c1 == 0x00)
|
||||
return 1;
|
||||
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
|
||||
if ((c0 == 0x0b || c0 == 0x0c || c0 == 0x0d || c0 == 0x85)
|
||||
&& c1 == 0x00)
|
||||
return 1;
|
||||
if (c1 == 0x20 && (c0 == 0x29 || c0 == 0x28))
|
||||
return 1;
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
utf16le_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
|
||||
{
|
||||
@ -95,6 +121,24 @@ utf16le_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
|
||||
return code;
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
utf16le_mbc_to_code_se(OnigIterator* it, OnigPosition p, OnigPosition end ARG_UNUSED)
|
||||
{
|
||||
OnigCodePoint code;
|
||||
const UChar c0 = ONIG_CHARAT(p);
|
||||
const UChar c1 = ONIG_CHARAT(p+1);
|
||||
|
||||
if (UTF16_IS_SURROGATE_FIRST(c1)) {
|
||||
code = ((((c1 - 0xd8) << 2) + ((c0 & 0xc0) >> 6) + 1) << 16)
|
||||
+ ((((c0 & 0x3f) << 2) + (ONIG_CHARAT(p+3) - 0xdc)) << 8)
|
||||
+ ONIG_CHARAT(p+2);
|
||||
}
|
||||
else {
|
||||
code = c1 * 256 + c0;
|
||||
}
|
||||
return code;
|
||||
}
|
||||
|
||||
static int
|
||||
utf16le_code_to_mbc(OnigCodePoint code, UChar *buf)
|
||||
{
|
||||
@ -147,6 +191,34 @@ utf16le_mbc_case_fold(OnigCaseFoldType flag,
|
||||
fold);
|
||||
}
|
||||
|
||||
static int
|
||||
utf16le_mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag,
|
||||
OnigPosition* pp, OnigPosition end, UChar* fold)
|
||||
{
|
||||
const UChar c = ONIG_CHARAT(*pp);
|
||||
|
||||
if (ONIGENC_IS_ASCII_CODE(c) && ONIG_CHARAT(*pp+1) == 0) {
|
||||
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
|
||||
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
|
||||
if (c == 0x49) {
|
||||
*fold++ = 0x31;
|
||||
*fold = 0x01;
|
||||
(*pp) += 2;
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
*fold++ = ONIGENC_ASCII_CODE_TO_LOWER_CASE(c);
|
||||
*fold = 0;
|
||||
*pp += 2;
|
||||
return 2;
|
||||
}
|
||||
else
|
||||
return onigenc_unicode_mbc_case_fold_se(it, ONIG_ENCODING_UTF16_LE, flag, pp, end,
|
||||
fold);
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
utf16le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp,
|
||||
@ -195,6 +267,21 @@ utf16le_left_adjust_char_head(const UChar* start, const UChar* s)
|
||||
return (UChar* )s;
|
||||
}
|
||||
|
||||
static OnigPosition
|
||||
utf16le_left_adjust_char_head_se(OnigIterator* it, OnigPosition start, OnigPosition s)
|
||||
{
|
||||
if (s <= start) return s;
|
||||
|
||||
if ((s - start) % 2 == 1) {
|
||||
s--;
|
||||
}
|
||||
|
||||
if (UTF16_IS_SURROGATE_SECOND(ONIG_CHARAT(s+1)) && s > start + 1)
|
||||
s -= 2;
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
static int
|
||||
utf16le_get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
|
||||
@ -205,20 +292,25 @@ utf16le_get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
|
||||
OnigEncodingType OnigEncodingUTF16_LE = {
|
||||
utf16le_mbc_enc_len,
|
||||
utf16le_mbc_enc_len_se,
|
||||
"UTF-16LE", /* name */
|
||||
4, /* max byte length */
|
||||
2, /* min byte length */
|
||||
utf16le_is_mbc_newline,
|
||||
utf16le_is_mbc_newline_se,
|
||||
utf16le_mbc_to_code,
|
||||
utf16le_mbc_to_code_se,
|
||||
utf16le_code_to_mbclen,
|
||||
utf16le_code_to_mbc,
|
||||
utf16le_mbc_case_fold,
|
||||
utf16le_mbc_case_fold_se,
|
||||
onigenc_unicode_apply_all_case_fold,
|
||||
utf16le_get_case_fold_codes_by_str,
|
||||
onigenc_unicode_property_name_to_ctype,
|
||||
onigenc_unicode_is_code_ctype,
|
||||
onigenc_utf16_32_get_ctype_code_range,
|
||||
utf16le_left_adjust_char_head,
|
||||
utf16le_left_adjust_char_head_se,
|
||||
onigenc_always_false_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_UNICODE,
|
||||
};
|
||||
|
@ -35,6 +35,12 @@ utf32be_mbc_enc_len(const UChar* p ARG_UNUSED)
|
||||
return 4;
|
||||
}
|
||||
|
||||
static int
|
||||
utf32be_mbc_enc_len_se(OnigIterator* it ARG_UNUSED, OnigPosition p ARG_UNUSED)
|
||||
{
|
||||
return 4;
|
||||
}
|
||||
|
||||
static int
|
||||
utf32be_is_mbc_newline(const UChar* p, const UChar* end)
|
||||
{
|
||||
@ -53,12 +59,39 @@ utf32be_is_mbc_newline(const UChar* p, const UChar* end)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
utf32be_is_mbc_newline_se(OnigIterator* it, OnigPosition p, OnigPosition end)
|
||||
{
|
||||
if (p + 3 < end) {
|
||||
const UChar c2 = ONIG_CHARAT(p+2);
|
||||
const UChar c3 = ONIG_CHARAT(p+3);
|
||||
|
||||
if (c3 == 0x0a && c2 == 0 && ONIG_CHARAT(p+1) == 0 && ONIG_CHARAT(p) == 0)
|
||||
return 1;
|
||||
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
|
||||
if ((c3 == 0x0b || c3 == 0x0c || c3 == 0x0d || c3 == 0x85)
|
||||
&& c2 == 0 && ONIG_CHARAT(p+1) == 0 && ONIG_CHARAT(p) == 0x00)
|
||||
return 1;
|
||||
if (c2 == 0x20 && (c3 == 0x29 || c3 == 0x28)
|
||||
&& ONIG_CHARAT(p+1) == 0 && ONIG_CHARAT(p) == 0)
|
||||
return 1;
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
utf32be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
|
||||
{
|
||||
return (OnigCodePoint )(((p[0] * 256 + p[1]) * 256 + p[2]) * 256 + p[3]);
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
utf32be_mbc_to_code_se(OnigIterator* it, OnigPosition p, OnigPosition end ARG_UNUSED)
|
||||
{
|
||||
return (OnigCodePoint )(((ONIG_CHARAT(p) * 256 + ONIG_CHARAT(p+1)) * 256 + ONIG_CHARAT(p+2)) * 256 + ONIG_CHARAT(p+3));
|
||||
}
|
||||
|
||||
static int
|
||||
utf32be_code_to_mbclen(OnigCodePoint code ARG_UNUSED)
|
||||
{
|
||||
@ -108,6 +141,38 @@ utf32be_mbc_case_fold(OnigCaseFoldType flag,
|
||||
fold);
|
||||
}
|
||||
|
||||
static int
|
||||
utf32be_mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag,
|
||||
OnigPosition* pp, OnigPosition end, UChar* fold)
|
||||
{
|
||||
OnigPosition p = *pp;
|
||||
const UChar c3 = ONIG_CHARAT(p+3);
|
||||
|
||||
if (ONIGENC_IS_ASCII_CODE(c3) && ONIG_CHARAT(p+2) == 0 && ONIG_CHARAT(p+1) == 0 && ONIG_CHARAT(p) == 0) {
|
||||
*fold++ = 0;
|
||||
*fold++ = 0;
|
||||
|
||||
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
|
||||
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
|
||||
if (c3 == 0x49) {
|
||||
*fold++ = 0x01;
|
||||
*fold = 0x31;
|
||||
(*pp) += 4;
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
*fold++ = 0;
|
||||
*fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(c3);
|
||||
*pp += 4;
|
||||
return 4;
|
||||
}
|
||||
else
|
||||
return onigenc_unicode_mbc_case_fold_se(it, ONIG_ENCODING_UTF32_BE, flag, pp, end,
|
||||
fold);
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
utf32be_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
|
||||
@ -152,6 +217,17 @@ utf32be_left_adjust_char_head(const UChar* start, const UChar* s)
|
||||
return (UChar* )(s - rem);
|
||||
}
|
||||
|
||||
static OnigPosition
|
||||
utf32be_left_adjust_char_head_se(OnigIterator* it, OnigPosition start, OnigPosition s)
|
||||
{
|
||||
int rem;
|
||||
|
||||
if (s <= start) return s;
|
||||
|
||||
rem = (int )((s - start) % 4);
|
||||
return (s - rem);
|
||||
}
|
||||
|
||||
static int
|
||||
utf32be_get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
|
||||
@ -162,20 +238,25 @@ utf32be_get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
|
||||
OnigEncodingType OnigEncodingUTF32_BE = {
|
||||
utf32be_mbc_enc_len,
|
||||
utf32be_mbc_enc_len_se,
|
||||
"UTF-32BE", /* name */
|
||||
4, /* max byte length */
|
||||
4, /* min byte length */
|
||||
utf32be_is_mbc_newline,
|
||||
utf32be_is_mbc_newline_se,
|
||||
utf32be_mbc_to_code,
|
||||
utf32be_mbc_to_code_se,
|
||||
utf32be_code_to_mbclen,
|
||||
utf32be_code_to_mbc,
|
||||
utf32be_mbc_case_fold,
|
||||
utf32be_mbc_case_fold_se,
|
||||
onigenc_unicode_apply_all_case_fold,
|
||||
utf32be_get_case_fold_codes_by_str,
|
||||
onigenc_unicode_property_name_to_ctype,
|
||||
onigenc_unicode_is_code_ctype,
|
||||
onigenc_utf16_32_get_ctype_code_range,
|
||||
utf32be_left_adjust_char_head,
|
||||
utf32be_left_adjust_char_head_se,
|
||||
onigenc_always_false_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_UNICODE,
|
||||
};
|
||||
|
@ -35,6 +35,12 @@ utf32le_mbc_enc_len(const UChar* p ARG_UNUSED)
|
||||
return 4;
|
||||
}
|
||||
|
||||
static int
|
||||
utf32le_mbc_enc_len_se(OnigIterator* it ARG_UNUSED, OnigPosition p ARG_UNUSED)
|
||||
{
|
||||
return 4;
|
||||
}
|
||||
|
||||
static int
|
||||
utf32le_is_mbc_newline(const UChar* p, const UChar* end)
|
||||
{
|
||||
@ -43,7 +49,7 @@ utf32le_is_mbc_newline(const UChar* p, const UChar* end)
|
||||
return 1;
|
||||
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
|
||||
if ((*p == 0x0b ||*p == 0x0c ||*p == 0x0d || *p == 0x85)
|
||||
&& *(p+1) == 0x00 && (p+2) == 0x00 && *(p+3) == 0x00)
|
||||
&& *(p+1) == 0x00 && *(p+2) == 0x00 && *(p+3) == 0x00)
|
||||
return 1;
|
||||
if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28)
|
||||
&& *(p+2) == 0x00 && *(p+3) == 0x00)
|
||||
@ -53,12 +59,39 @@ utf32le_is_mbc_newline(const UChar* p, const UChar* end)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
utf32le_is_mbc_newline_se(OnigIterator* it, OnigPosition p, OnigPosition end)
|
||||
{
|
||||
if (p + 3 < end) {
|
||||
const UChar c0 = ONIG_CHARAT(p);
|
||||
const UChar c1 = ONIG_CHARAT(p+1);
|
||||
|
||||
if (c0 == 0x0a && c1 == 0 && ONIG_CHARAT(p+2) == 0 && ONIG_CHARAT(p+3) == 0)
|
||||
return 1;
|
||||
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
|
||||
if ((c0 == 0x0b || c0 == 0x0c || c0 == 0x0d || c0 == 0x85)
|
||||
&& c1 == 0x00 && ONIG_CHARAT(p+2) == 0x00 && ONIG_CHARAT(p+3) == 0x00)
|
||||
return 1;
|
||||
if (c1 == 0x20 && (c0 == 0x29 || c0 == 0x28)
|
||||
&& ONIG_CHARAT(p+2) == 0x00 && ONIG_CHARAT(p+3) == 0x00)
|
||||
return 1;
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
utf32le_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
|
||||
{
|
||||
return (OnigCodePoint )(((p[3] * 256 + p[2]) * 256 + p[1]) * 256 + p[0]);
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
utf32le_mbc_to_code_se(OnigIterator* it, OnigPosition p, OnigPosition end ARG_UNUSED)
|
||||
{
|
||||
return (OnigCodePoint )(((ONIG_CHARAT(p+3) * 256 + ONIG_CHARAT(p+2)) * 256 + ONIG_CHARAT(p+1)) * 256 + ONIG_CHARAT(p));
|
||||
}
|
||||
|
||||
static int
|
||||
utf32le_code_to_mbclen(OnigCodePoint code ARG_UNUSED)
|
||||
{
|
||||
@ -109,6 +142,39 @@ utf32le_mbc_case_fold(OnigCaseFoldType flag,
|
||||
fold);
|
||||
}
|
||||
|
||||
static int
|
||||
utf32le_mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag,
|
||||
OnigPosition* pp, OnigPosition end, UChar* fold)
|
||||
{
|
||||
const OnigPosition p = *pp;
|
||||
const UChar c = ONIG_CHARAT(p);
|
||||
|
||||
if (ONIGENC_IS_ASCII_CODE(c) && ONIG_CHARAT(p+1) == 0 && ONIG_CHARAT(p+2) == 0 && ONIG_CHARAT(p+3) == 0) {
|
||||
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
|
||||
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
|
||||
if (c == 0x49) {
|
||||
*fold++ = 0x31;
|
||||
*fold++ = 0x01;
|
||||
}
|
||||
}
|
||||
else {
|
||||
#endif
|
||||
*fold++ = ONIGENC_ASCII_CODE_TO_LOWER_CASE(c);
|
||||
*fold++ = 0;
|
||||
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
|
||||
}
|
||||
#endif
|
||||
|
||||
*fold++ = 0;
|
||||
*fold = 0;
|
||||
*pp += 4;
|
||||
return 4;
|
||||
}
|
||||
else
|
||||
return onigenc_unicode_mbc_case_fold_se(it, ONIG_ENCODING_UTF32_LE, flag, pp, end,
|
||||
fold);
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
utf32le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
|
||||
@ -152,6 +218,17 @@ utf32le_left_adjust_char_head(const UChar* start, const UChar* s)
|
||||
return (UChar* )(s - rem);
|
||||
}
|
||||
|
||||
static OnigPosition
|
||||
utf32le_left_adjust_char_head_se(OnigIterator* it, OnigPosition start, OnigPosition s)
|
||||
{
|
||||
int rem;
|
||||
|
||||
if (s <= start) return s;
|
||||
|
||||
rem = (int )((s - start) % 4);
|
||||
return (s - rem);
|
||||
}
|
||||
|
||||
static int
|
||||
utf32le_get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
|
||||
@ -162,20 +239,25 @@ utf32le_get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
|
||||
OnigEncodingType OnigEncodingUTF32_LE = {
|
||||
utf32le_mbc_enc_len,
|
||||
utf32le_mbc_enc_len_se,
|
||||
"UTF-32LE", /* name */
|
||||
4, /* max byte length */
|
||||
4, /* min byte length */
|
||||
utf32le_is_mbc_newline,
|
||||
utf32le_is_mbc_newline_se,
|
||||
utf32le_mbc_to_code,
|
||||
utf32le_mbc_to_code_se,
|
||||
utf32le_code_to_mbclen,
|
||||
utf32le_code_to_mbc,
|
||||
utf32le_mbc_case_fold,
|
||||
utf32le_mbc_case_fold_se,
|
||||
onigenc_unicode_apply_all_case_fold,
|
||||
utf32le_get_case_fold_codes_by_str,
|
||||
onigenc_unicode_property_name_to_ctype,
|
||||
onigenc_unicode_is_code_ctype,
|
||||
onigenc_utf16_32_get_ctype_code_range,
|
||||
utf32le_left_adjust_char_head,
|
||||
utf32le_left_adjust_char_head_se,
|
||||
onigenc_always_false_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_UNICODE,
|
||||
};
|
||||
|
@ -65,6 +65,12 @@ mbc_enc_len(const UChar* p)
|
||||
return EncLen_UTF8[*p];
|
||||
}
|
||||
|
||||
static int
|
||||
mbc_enc_len_se(OnigIterator* it, OnigPosition p)
|
||||
{
|
||||
return EncLen_UTF8[ONIG_CHARAT(p)];
|
||||
}
|
||||
|
||||
static int
|
||||
is_mbc_newline(const UChar* p, const UChar* end)
|
||||
{
|
||||
@ -88,6 +94,32 @@ is_mbc_newline(const UChar* p, const UChar* end)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
is_mbc_newline_se(OnigIterator* it, OnigPosition p, OnigPosition end)
|
||||
{
|
||||
if (p < end) {
|
||||
const UChar c0 = ONIG_CHARAT(p);
|
||||
if (c0 == 0x0a) return 1;
|
||||
|
||||
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
|
||||
if (c0 == 0x0b || c0 == 0x0c || c0 == 0x0d) return 1;
|
||||
if (p + 1 < end) {
|
||||
const UChar c1 = ONIG_CHARAT(p+1);
|
||||
if (c1 == 0x85 && c0 == 0xc2) /* U+0085 */
|
||||
return 1;
|
||||
if (p + 2 < end) {
|
||||
const UChar c2 = ONIG_CHARAT(p+2);
|
||||
if ((c2 == 0xa8 || c2 == 0xa9)
|
||||
&& c1 == 0x80 && c0 == 0xe2) /* U+2028, U+2029 */
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
|
||||
{
|
||||
@ -115,6 +147,33 @@ mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
|
||||
}
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
mbc_to_code_se(OnigIterator* it, OnigPosition p, OnigPosition end ARG_UNUSED)
|
||||
{
|
||||
int c, len;
|
||||
OnigCodePoint n;
|
||||
|
||||
len = mbc_enc_len_se(it, p);
|
||||
c = ONIG_CHARAT(p++);
|
||||
if (len > 1) {
|
||||
len--;
|
||||
n = c & ((1 << (6 - len)) - 1);
|
||||
while (len--) {
|
||||
c = ONIG_CHARAT(p++);
|
||||
n = (n << 6) | (c & ((1 << 6) - 1));
|
||||
}
|
||||
return n;
|
||||
}
|
||||
else {
|
||||
#ifdef USE_INVALID_CODE_SCHEME
|
||||
if (c > 0xfd) {
|
||||
return ((c == 0xfe) ? INVALID_CODE_FE : INVALID_CODE_FF);
|
||||
}
|
||||
#endif
|
||||
return (OnigCodePoint )c;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
code_to_mbclen(OnigCodePoint code)
|
||||
{
|
||||
@ -217,6 +276,34 @@ mbc_case_fold(OnigCaseFoldType flag, const UChar** pp,
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag, OnigPosition* pp,
|
||||
OnigPosition end, UChar* fold)
|
||||
{
|
||||
const UChar c = ONIG_CHARAT(*pp);
|
||||
|
||||
if (ONIGENC_IS_MBC_ASCII_SE(c)) {
|
||||
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
|
||||
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
|
||||
if (c == 0x49) {
|
||||
*fold++ = 0xc4;
|
||||
*fold = 0xb1;
|
||||
(*pp)++;
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
*fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(c);
|
||||
(*pp)++;
|
||||
return 1; /* return byte length of converted char to lower */
|
||||
}
|
||||
else {
|
||||
return onigenc_unicode_mbc_case_fold_se(it, ONIG_ENCODING_UTF8, flag,
|
||||
pp, end, fold);
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
|
||||
@ -275,6 +362,18 @@ left_adjust_char_head(const UChar* start, const UChar* s)
|
||||
return (UChar* )p;
|
||||
}
|
||||
|
||||
static OnigPosition
|
||||
left_adjust_char_head_se(OnigIterator* it, OnigPosition start, OnigPosition s)
|
||||
{
|
||||
OnigPosition p;
|
||||
|
||||
if (s <= start) return s;
|
||||
p = s;
|
||||
|
||||
while (!utf8_islead(ONIG_CHARAT(p)) && p > start) p--;
|
||||
return p;
|
||||
}
|
||||
|
||||
static int
|
||||
get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
|
||||
@ -285,20 +384,25 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
|
||||
OnigEncodingType OnigEncodingUTF8 = {
|
||||
mbc_enc_len,
|
||||
mbc_enc_len_se,
|
||||
"UTF-8", /* name */
|
||||
6, /* max byte length */
|
||||
1, /* min byte length */
|
||||
is_mbc_newline,
|
||||
is_mbc_newline_se,
|
||||
mbc_to_code,
|
||||
mbc_to_code_se,
|
||||
code_to_mbclen,
|
||||
code_to_mbc,
|
||||
mbc_case_fold,
|
||||
mbc_case_fold_se,
|
||||
onigenc_unicode_apply_all_case_fold,
|
||||
get_case_fold_codes_by_str,
|
||||
onigenc_unicode_property_name_to_ctype,
|
||||
onigenc_unicode_is_code_ctype,
|
||||
get_ctype_code_range,
|
||||
left_adjust_char_head,
|
||||
left_adjust_char_head_se,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_UNICODE,
|
||||
};
|
||||
|
@ -68,9 +68,9 @@ void re_free_pattern P_((struct re_pattern_buffer*));
|
||||
ONIG_EXTERN
|
||||
int re_adjust_startpos P_((struct re_pattern_buffer*, const char*, int, int, int));
|
||||
ONIG_EXTERN
|
||||
int re_search P_((struct re_pattern_buffer*, const char*, int, int, int, struct re_registers*));
|
||||
int re_search P_((OnigIterator* it, struct re_pattern_buffer*, OnigPosition, OnigPosition, OnigPosition, OnigPosition, struct re_registers*));
|
||||
ONIG_EXTERN
|
||||
int re_match P_((struct re_pattern_buffer*, const char *, int, int, struct re_registers*));
|
||||
int re_match P_((OnigIterator* it, struct re_pattern_buffer*, OnigPosition, OnigPosition, OnigPosition, struct re_registers*));
|
||||
ONIG_EXTERN
|
||||
void re_set_casetable P_((const char*));
|
||||
ONIG_EXTERN
|
||||
|
@ -152,7 +152,7 @@ ONIG_EXTERN const char* onig_copyright P_((void));
|
||||
|
||||
|
||||
ONIG_EXTERN int regcomp P_((regex_t* reg, const char* pat, int options));
|
||||
ONIG_EXTERN int regexec P_((regex_t* reg, const char* str, size_t nmatch, regmatch_t* matches, int options));
|
||||
ONIG_EXTERN OnigPosition regexec P_((OnigIterator* it, regex_t* reg, OnigPosition str, size_t nmatch, regmatch_t* matches, int options));
|
||||
ONIG_EXTERN void regfree P_((regex_t* reg));
|
||||
ONIG_EXTERN size_t regerror P_((int code, const regex_t* reg, char* buf, size_t size));
|
||||
|
||||
|
@ -102,11 +102,31 @@ extern "C" {
|
||||
typedef unsigned char OnigUChar;
|
||||
typedef unsigned int OnigCodePoint;
|
||||
typedef unsigned int OnigCtype;
|
||||
typedef size_t OnigDistance;
|
||||
typedef ptrdiff_t OnigPosition;
|
||||
|
||||
typedef size_t OnigDistance;
|
||||
#define ONIG_INFINITE_DISTANCE ~((OnigDistance )0)
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
typedef __int64 OnigPosition;
|
||||
#else
|
||||
typedef long long OnigPosition;
|
||||
#endif
|
||||
#define ONIG_BADPOS -1
|
||||
#define ONIG_IS_BADPOS(p) (p == ONIG_BADPOS)
|
||||
#define ONIG_IS_NOT_BADPOS(p) (p != ONIG_BADPOS)
|
||||
|
||||
typedef UChar (*OnigCharAtFunc)(OnigPosition pos, const void* ptr);
|
||||
typedef struct OnigIteratorStruct {
|
||||
OnigCharAtFunc at;
|
||||
const void* ptr;
|
||||
} OnigIterator;
|
||||
|
||||
#define ONIG_CHARAT(pos) (it->at(pos, it->ptr))
|
||||
|
||||
/* Iterator API */
|
||||
ONIG_EXTERN
|
||||
UChar onig_default_charat P_((OnigPosition pos, const void* ptr));
|
||||
|
||||
typedef unsigned int OnigCaseFoldType; /* case fold flag */
|
||||
|
||||
ONIG_EXTERN OnigCaseFoldType OnigDefaultCaseFoldFlag;
|
||||
@ -148,20 +168,25 @@ typedef int (*OnigApplyAllCaseFoldFunc)(OnigCodePoint from, OnigCodePoint* to, i
|
||||
|
||||
typedef struct OnigEncodingTypeST {
|
||||
int (*mbc_enc_len)(const OnigUChar* p);
|
||||
int (*mbc_enc_len_se)(OnigIterator* it, OnigPosition p);
|
||||
const char* name;
|
||||
int max_enc_len;
|
||||
int min_enc_len;
|
||||
int (*is_mbc_newline)(const OnigUChar* p, const OnigUChar* end);
|
||||
int (*is_mbc_newline_se)(OnigIterator* it, OnigPosition p, OnigPosition end);
|
||||
OnigCodePoint (*mbc_to_code)(const OnigUChar* p, const OnigUChar* end);
|
||||
OnigCodePoint (*mbc_to_code_se)(OnigIterator* it, OnigPosition p, OnigPosition end);
|
||||
int (*code_to_mbclen)(OnigCodePoint code);
|
||||
int (*code_to_mbc)(OnigCodePoint code, OnigUChar *buf);
|
||||
int (*mbc_case_fold)(OnigCaseFoldType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to);
|
||||
int (*mbc_case_fold_se)(OnigIterator* it, OnigCaseFoldType flag, OnigPosition* pp, OnigPosition end, OnigUChar* to);
|
||||
int (*apply_all_case_fold)(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg);
|
||||
int (*get_case_fold_codes_by_str)(OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem acs[]);
|
||||
int (*property_name_to_ctype)(struct OnigEncodingTypeST* enc, OnigUChar* p, OnigUChar* end);
|
||||
int (*is_code_ctype)(OnigCodePoint code, OnigCtype ctype);
|
||||
int (*get_ctype_code_range)(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[]);
|
||||
OnigUChar* (*left_adjust_char_head)(const OnigUChar* start, const OnigUChar* p);
|
||||
OnigPosition (*left_adjust_char_head_se)(OnigIterator* it, OnigPosition start, OnigPosition p);
|
||||
int (*is_allowed_reverse_match)(const OnigUChar* p, const OnigUChar* end);
|
||||
unsigned int flags;
|
||||
} OnigEncodingType;
|
||||
@ -270,37 +295,57 @@ ONIG_EXTERN OnigEncodingType OnigEncodingGB18030;
|
||||
#define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF)
|
||||
#define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1)
|
||||
#define ONIGENC_IS_MBC_HEAD(enc,p) (ONIGENC_MBC_ENC_LEN(enc,p) != 1)
|
||||
#define ONIGENC_IS_MBC_HEAD_SE(it,enc,p) (ONIGENC_MBC_ENC_LEN_SE(it,enc,p) != 1)
|
||||
#define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128)
|
||||
#define ONIGENC_IS_MBC_ASCII_SE(c) (c < 128)
|
||||
#define ONIGENC_IS_CODE_ASCII(code) ((code) < 128)
|
||||
#define ONIGENC_IS_MBC_WORD(enc,s,end) \
|
||||
ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE(enc,s,end))
|
||||
#define ONIGENC_IS_MBC_WORD_SE(it,enc,s,end) \
|
||||
ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE_SE(it,enc,s,end))
|
||||
#define ONIGENC_IS_MBC_SINGLEBYTE_SE(it,enc,s,end) \
|
||||
(ONIGENC_MBC_TO_CODE_SE(it,enc,s,end) <= 0xFF)
|
||||
#define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \
|
||||
onigenc_ascii_is_code_ctype( \
|
||||
ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD)
|
||||
#define ONIGENC_IS_MBC_ASCII_WORD_SE(it,enc,s,end) \
|
||||
onigenc_ascii_is_code_ctype( \
|
||||
ONIGENC_MBC_TO_CODE_SE(it,enc,s,end),ONIGENC_CTYPE_WORD)
|
||||
#define ONIGENC_IS_UNICODE(enc) ((enc)->flags & ONIGENC_FLAG_UNICODE)
|
||||
|
||||
#define ONIGENC_SCRIPT(enc,s,end) (onigenc_unicode_code_script(ONIGENC_MBC_TO_CODE(enc,s,end)))
|
||||
#define ONIGENC_SCRIPT_SE(it,enc,s,end) (onigenc_unicode_code_script(ONIGENC_MBC_TO_CODE_SE(it,enc,s,end)))
|
||||
|
||||
#define ONIGENC_NAME(enc) ((enc)->name)
|
||||
|
||||
#define ONIGENC_MBC_CASE_FOLD(enc,flag,pp,end,buf) \
|
||||
(enc)->mbc_case_fold(flag,(const OnigUChar** )pp,end,buf)
|
||||
#define ONIGENC_MBC_CASE_FOLD_SE(it,enc,flag,pp,end,buf) \
|
||||
(enc)->mbc_case_fold_se(it,flag,(OnigPosition* )pp,end,buf)
|
||||
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
|
||||
(enc)->is_allowed_reverse_match(s,end)
|
||||
#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \
|
||||
(enc)->left_adjust_char_head(start, s)
|
||||
#define ONIGENC_LEFT_ADJUST_CHAR_HEAD_SE(it,enc,start,s) \
|
||||
(enc)->left_adjust_char_head_se(it, start, s)
|
||||
#define ONIGENC_APPLY_ALL_CASE_FOLD(enc,case_fold_flag,f,arg) \
|
||||
(enc)->apply_all_case_fold(case_fold_flag,f,arg)
|
||||
#define ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc,case_fold_flag,p,end,acs) \
|
||||
(enc)->get_case_fold_codes_by_str(case_fold_flag,p,end,acs)
|
||||
#define ONIGENC_STEP_BACK(enc,start,s,n) \
|
||||
onigenc_step_back((enc),(start),(s),(n))
|
||||
#define ONIGENC_STEP_BACK_SE(it,enc,start,s,n) \
|
||||
onigenc_step_back_se((it),(enc),(start),(s),(n))
|
||||
|
||||
#define ONIGENC_MBC_ENC_LEN(enc,p) (enc)->mbc_enc_len(p)
|
||||
#define ONIGENC_MBC_ENC_LEN_SE(it,enc,p) (enc)->mbc_enc_len_se((it),(p))
|
||||
#define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len)
|
||||
#define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc)
|
||||
#define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len)
|
||||
#define ONIGENC_IS_MBC_NEWLINE(enc,p,end) (enc)->is_mbc_newline((p),(end))
|
||||
#define ONIGENC_IS_MBC_NEWLINE_SE(it,enc,p,end) (enc)->is_mbc_newline_se((it),(p),(end))
|
||||
#define ONIGENC_MBC_TO_CODE(enc,p,end) (enc)->mbc_to_code((p),(end))
|
||||
#define ONIGENC_MBC_TO_CODE_SE(it,enc,p,end) (enc)->mbc_to_code_se((it),(p),(end))
|
||||
#define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code)
|
||||
#define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf)
|
||||
#define ONIGENC_PROPERTY_NAME_TO_CTYPE(enc,p,end) \
|
||||
@ -342,7 +387,8 @@ ONIG_EXTERN OnigEncodingType OnigEncodingGB18030;
|
||||
|
||||
ONIG_EXTERN
|
||||
OnigUChar* onigenc_step_back P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, int n));
|
||||
|
||||
ONIG_EXTERN
|
||||
OnigPosition onigenc_step_back_se P_((OnigIterator* it, OnigEncoding enc, OnigPosition start, OnigPosition s, int n));
|
||||
|
||||
/* encoding API */
|
||||
ONIG_EXTERN
|
||||
@ -356,18 +402,29 @@ void onigenc_set_default_caseconv_table P_((const OnigUChar* table));
|
||||
ONIG_EXTERN
|
||||
OnigUChar* onigenc_get_right_adjust_char_head_with_prev P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar** prev));
|
||||
ONIG_EXTERN
|
||||
OnigPosition onigenc_get_right_adjust_char_head_with_prev_se P_((OnigIterator* it, OnigEncoding enc, OnigPosition start, OnigPosition s, OnigPosition* prev));
|
||||
ONIG_EXTERN
|
||||
OnigUChar* onigenc_get_prev_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s));
|
||||
ONIG_EXTERN
|
||||
OnigPosition onigenc_get_prev_char_head_se P_((OnigIterator* it, OnigEncoding enc, OnigPosition start, OnigPosition s));
|
||||
ONIG_EXTERN
|
||||
OnigUChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s));
|
||||
ONIG_EXTERN
|
||||
OnigPosition onigenc_get_left_adjust_char_head_se P_((OnigIterator* it, OnigEncoding enc, OnigPosition start, OnigPosition s));
|
||||
ONIG_EXTERN
|
||||
OnigUChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s));
|
||||
ONIG_EXTERN
|
||||
OnigPosition onigenc_get_right_adjust_char_head_se P_((OnigIterator* it, OnigEncoding enc, OnigPosition start, OnigPosition s));
|
||||
ONIG_EXTERN
|
||||
int onigenc_strlen P_((OnigEncoding enc, const OnigUChar* p, const OnigUChar* end));
|
||||
ONIG_EXTERN
|
||||
int onigenc_strlen_se P_((OnigIterator* it, OnigEncoding enc, OnigPosition p, OnigPosition end));
|
||||
ONIG_EXTERN
|
||||
int onigenc_strlen_null P_((OnigEncoding enc, const OnigUChar* p));
|
||||
ONIG_EXTERN
|
||||
int onigenc_str_bytelen_null P_((OnigEncoding enc, const OnigUChar* p));
|
||||
|
||||
ONIG_EXTERN
|
||||
OnigPosition onigenc_str_bytelen_null_se P_((OnigIterator* it, OnigEncoding enc, OnigPosition p));
|
||||
|
||||
|
||||
/* PART: regular expression */
|
||||
@ -406,7 +463,9 @@ typedef unsigned int OnigOptionType;
|
||||
#define ONIG_OPTION_WORD_BOUND_ALL_RANGE (ONIG_OPTION_POSIX_BRACKET_ALL_RANGE << 1)
|
||||
/* options (newline) */
|
||||
#define ONIG_OPTION_NEWLINE_CRLF (ONIG_OPTION_WORD_BOUND_ALL_RANGE << 1)
|
||||
#define ONIG_OPTION_MAXBIT ONIG_OPTION_NEWLINE_CRLF /* limit */
|
||||
// options (whole word)
|
||||
#define SE_ONIG_OPTION_WHOLEWORD (ONIG_OPTION_NEWLINE_CRLF << 1)
|
||||
#define ONIG_OPTION_MAXBIT SE_ONIG_OPTION_WHOLEWORD /* limit */
|
||||
|
||||
#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt))
|
||||
#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt))
|
||||
@ -422,6 +481,7 @@ typedef struct {
|
||||
} OnigSyntaxType;
|
||||
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxASIS;
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxWildChar;
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic;
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended;
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs;
|
||||
@ -436,6 +496,7 @@ ONIG_EXTERN OnigSyntaxType OnigSyntaxPython;
|
||||
|
||||
/* predefined syntaxes (see regsyntax.c) */
|
||||
#define ONIG_SYNTAX_ASIS (&OnigSyntaxASIS)
|
||||
#define ONIG_SYNTAX_WILDCHAR (&OnigSyntaxWildChar)
|
||||
#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic)
|
||||
#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended)
|
||||
#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs)
|
||||
@ -775,11 +836,13 @@ int onig_recompile P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pat
|
||||
ONIG_EXTERN
|
||||
int onig_recompile_deluxe P_((OnigRegex reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
|
||||
ONIG_EXTERN
|
||||
OnigPosition onig_search P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option));
|
||||
OnigPosition onig_search P_((OnigIterator* it, OnigRegex, OnigPosition str, OnigPosition end, OnigPosition start, OnigPosition range, OnigRegion* region, OnigOptionType option));
|
||||
ONIG_EXTERN
|
||||
OnigPosition onig_search_gpos P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* global_pos, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option));
|
||||
OnigPosition onig_search_gpos P_((OnigIterator* it, OnigRegex, OnigPosition str, OnigPosition end, OnigPosition global_pos, OnigPosition start, OnigPosition range, OnigRegion* region, OnigOptionType option));
|
||||
ONIG_EXTERN
|
||||
OnigPosition onig_match P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option));
|
||||
OnigPosition onig_match P_((OnigIterator* it, OnigRegex, OnigPosition str, OnigPosition end, OnigPosition at, OnigRegion* region, OnigOptionType option));
|
||||
ONIG_EXTERN
|
||||
OnigPosition onig_match_gpos P_((OnigIterator* it, OnigRegex, OnigPosition str, OnigPosition end, OnigPosition global_pos, OnigPosition at, OnigRegion* region, OnigOptionType option));
|
||||
ONIG_EXTERN
|
||||
OnigRegion* onig_region_new P_((void));
|
||||
ONIG_EXTERN
|
||||
|
@ -5791,9 +5791,13 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
|
||||
scan_env.mem_nodes_dynamic = (Node** )NULL;
|
||||
}
|
||||
|
||||
if (IS_WHOLEWORD(reg->options))
|
||||
r = add_opcode(reg, OP_WORD_BEGIN);
|
||||
r = compile_tree(root, reg);
|
||||
if (r == 0) {
|
||||
r = add_opcode(reg, OP_END);
|
||||
if (IS_WHOLEWORD(reg->options))
|
||||
r = add_opcode(reg, OP_WORD_END);
|
||||
r = add_opcode(reg, OP_END);
|
||||
#ifdef USE_SUBEXP_CALL
|
||||
if (scan_env.num_call > 0) {
|
||||
r = unset_addr_list_fix(&uslist, reg);
|
||||
@ -5996,6 +6000,10 @@ onig_end(void)
|
||||
onig_free_node_list();
|
||||
#endif
|
||||
|
||||
#ifdef USE_SHARED_UNICODE_TABLE
|
||||
onig_free_shared_unicode_table();
|
||||
#endif
|
||||
|
||||
onig_inited = 0;
|
||||
|
||||
THREAD_ATOMIC_END;
|
||||
|
@ -61,6 +61,16 @@ onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const U
|
||||
return p;
|
||||
}
|
||||
|
||||
extern OnigPosition
|
||||
onigenc_get_right_adjust_char_head_se(OnigIterator* it, OnigEncoding enc, OnigPosition start, OnigPosition s)
|
||||
{
|
||||
OnigPosition p = ONIGENC_LEFT_ADJUST_CHAR_HEAD_SE(it, enc, start, s);
|
||||
if (p < s) {
|
||||
p += enclen_se(it, enc, p);
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
extern UChar*
|
||||
onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
|
||||
const UChar* start, const UChar* s, const UChar** prev)
|
||||
@ -77,6 +87,22 @@ onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
|
||||
return p;
|
||||
}
|
||||
|
||||
extern OnigPosition
|
||||
onigenc_get_right_adjust_char_head_with_prev_se(OnigIterator* it, OnigEncoding enc,
|
||||
OnigPosition start, OnigPosition s, OnigPosition* prev)
|
||||
{
|
||||
OnigPosition p = ONIGENC_LEFT_ADJUST_CHAR_HEAD_SE(it, enc, start, s);
|
||||
|
||||
if (p < s) {
|
||||
if (prev) *prev = p;
|
||||
p += enclen_se(it, enc, p);
|
||||
}
|
||||
else {
|
||||
if (prev) *prev = ONIG_BADPOS; /* Sorry */
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
extern UChar*
|
||||
onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
|
||||
{
|
||||
@ -86,6 +112,15 @@ onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
|
||||
return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
|
||||
}
|
||||
|
||||
extern OnigPosition
|
||||
onigenc_get_prev_char_head_se(OnigIterator* it, OnigEncoding enc, OnigPosition start, OnigPosition s)
|
||||
{
|
||||
if (s <= start)
|
||||
return ONIG_BADPOS;
|
||||
|
||||
return ONIGENC_LEFT_ADJUST_CHAR_HEAD_SE(it, enc, start, s - 1);
|
||||
}
|
||||
|
||||
extern UChar*
|
||||
onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n)
|
||||
{
|
||||
@ -98,6 +133,18 @@ onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n)
|
||||
return (UChar* )s;
|
||||
}
|
||||
|
||||
extern OnigPosition
|
||||
onigenc_step_back_se(OnigIterator* it, OnigEncoding enc, OnigPosition start, OnigPosition s, int n)
|
||||
{
|
||||
while (ONIG_IS_NOT_BADPOS(s) && n-- > 0) {
|
||||
if (s <= start)
|
||||
return ONIG_BADPOS;
|
||||
|
||||
s = ONIGENC_LEFT_ADJUST_CHAR_HEAD_SE(it, enc, start, s - 1);
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
extern UChar*
|
||||
onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)
|
||||
{
|
||||
@ -121,6 +168,18 @@ onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
|
||||
return n;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_strlen_se(OnigIterator* it, OnigEncoding enc, OnigPosition p, OnigPosition end)
|
||||
{
|
||||
int n = 0;
|
||||
|
||||
while (p < end) {
|
||||
p += ONIGENC_MBC_ENC_LEN_SE(it, enc, p);
|
||||
n++;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_strlen_null(OnigEncoding enc, const UChar* s)
|
||||
{
|
||||
@ -170,6 +229,30 @@ onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
|
||||
}
|
||||
}
|
||||
|
||||
extern OnigPosition
|
||||
onigenc_str_bytelen_null_se(OnigIterator* it, OnigEncoding enc, OnigPosition s)
|
||||
{
|
||||
OnigPosition start = s;
|
||||
OnigPosition p = s;
|
||||
|
||||
while (1) {
|
||||
if (ONIG_CHARAT(p) == '\0') {
|
||||
OnigPosition q;
|
||||
int len = ONIGENC_MBC_MINLEN(enc);
|
||||
|
||||
if (len == 1) return p - start;
|
||||
q = p + 1;
|
||||
while (len > 1) {
|
||||
if (ONIG_CHARAT(q) != '\0') break;
|
||||
q++;
|
||||
len--;
|
||||
}
|
||||
if (len == 1) return p - start;
|
||||
}
|
||||
p += ONIGENC_MBC_ENC_LEN_SE(it, enc, p);
|
||||
}
|
||||
}
|
||||
|
||||
const UChar OnigEncAsciiToLowerCaseTable[] = {
|
||||
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
|
||||
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
|
||||
@ -362,6 +445,12 @@ onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UC
|
||||
return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
|
||||
}
|
||||
|
||||
extern OnigPosition
|
||||
onigenc_get_left_adjust_char_head_se(OnigIterator* it, OnigEncoding enc, OnigPosition start, OnigPosition s)
|
||||
{
|
||||
return ONIGENC_LEFT_ADJUST_CHAR_HEAD_SE(it, enc, start, s);
|
||||
}
|
||||
|
||||
const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = {
|
||||
{ 0x41, 0x61 },
|
||||
{ 0x42, 0x62 },
|
||||
@ -570,6 +659,15 @@ onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end)
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_is_mbc_newline_0x0a_se(OnigIterator* it, OnigPosition p, OnigPosition end)
|
||||
{
|
||||
if (p < end) {
|
||||
if (ONIG_CHARAT(p) == 0x0a) return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* for single byte encodings */
|
||||
extern int
|
||||
onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,
|
||||
@ -581,6 +679,17 @@ onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,
|
||||
return 1; /* return byte length of converted char to lower */
|
||||
}
|
||||
|
||||
/* for single byte encodings */
|
||||
extern int
|
||||
onigenc_ascii_mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag ARG_UNUSED, OnigPosition* p,
|
||||
OnigPosition end ARG_UNUSED, UChar* lower)
|
||||
{
|
||||
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(ONIG_CHARAT(*p));
|
||||
|
||||
(*p)++;
|
||||
return 1; /* return byte length of converted char to lower */
|
||||
}
|
||||
|
||||
#if 0
|
||||
extern int
|
||||
onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag,
|
||||
@ -599,12 +708,24 @@ onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED)
|
||||
return 1;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_single_byte_mbc_enc_len_se(OnigIterator* it ARG_UNUSED, OnigPosition p ARG_UNUSED)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
extern OnigCodePoint
|
||||
onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
|
||||
{
|
||||
return (OnigCodePoint )(*p);
|
||||
}
|
||||
|
||||
extern OnigCodePoint
|
||||
onigenc_single_byte_mbc_to_code_se(OnigIterator* it, OnigPosition p, OnigPosition end ARG_UNUSED)
|
||||
{
|
||||
return (OnigCodePoint )(ONIG_CHARAT(p));
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED)
|
||||
{
|
||||
@ -625,6 +746,13 @@ onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED,
|
||||
return (UChar* )s;
|
||||
}
|
||||
|
||||
extern OnigPosition
|
||||
onigenc_single_byte_left_adjust_char_head_se(OnigIterator* it ARG_UNUSED, OnigPosition start ARG_UNUSED,
|
||||
OnigPosition s)
|
||||
{
|
||||
return s;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED,
|
||||
const UChar* end ARG_UNUSED)
|
||||
@ -666,6 +794,24 @@ onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
|
||||
return n;
|
||||
}
|
||||
|
||||
extern OnigCodePoint
|
||||
onigenc_mbn_mbc_to_code_se(OnigIterator* it, OnigEncoding enc, OnigPosition p, OnigPosition end)
|
||||
{
|
||||
int c, i, len;
|
||||
OnigCodePoint n;
|
||||
|
||||
len = enclen_se(it, enc, p);
|
||||
n = (OnigCodePoint )(ONIG_CHARAT(p++));
|
||||
if (len == 1) return n;
|
||||
|
||||
for (i = 1; i < len; i++) {
|
||||
if (p >= end) break;
|
||||
c = ONIG_CHARAT(p++);
|
||||
n <<= 8; n += c;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
|
||||
const UChar** pp, const UChar* end ARG_UNUSED,
|
||||
@ -691,6 +837,32 @@ onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
|
||||
}
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_mbn_mbc_case_fold_se(OnigIterator* it, OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
|
||||
OnigPosition* pp, OnigPosition end ARG_UNUSED,
|
||||
UChar* lower)
|
||||
{
|
||||
int len;
|
||||
OnigPosition p = *pp;
|
||||
const UChar c = ONIG_CHARAT(*pp);
|
||||
|
||||
if (ONIGENC_IS_MBC_ASCII_SE(c)) {
|
||||
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(c);
|
||||
(*pp)++;
|
||||
return 1;
|
||||
}
|
||||
else {
|
||||
int i;
|
||||
|
||||
len = enclen_se(it, enc, p);
|
||||
for (i = 0; i < len; i++) {
|
||||
*lower++ = ONIG_CHARAT(p++);
|
||||
}
|
||||
(*pp) += len;
|
||||
return len; /* return byte length of converted to lower char */
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
extern int
|
||||
onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,
|
||||
|
@ -73,6 +73,7 @@ typedef struct {
|
||||
#define ONIG_CHECK_NULL_RETURN_VAL(p,val) if (ONIG_IS_NULL(p)) return (val)
|
||||
|
||||
#define enclen(enc,p) ONIGENC_MBC_ENC_LEN(enc,p)
|
||||
#define enclen_se(it,enc,p) ONIGENC_MBC_ENC_LEN_SE(it,enc,p)
|
||||
|
||||
/* character types bit flag */
|
||||
#define BIT_CTYPE_NEWLINE (1<< ONIGENC_CTYPE_NEWLINE)
|
||||
@ -107,7 +108,7 @@ typedef struct {
|
||||
#define USE_CRNL_AS_LINE_TERMINATOR
|
||||
#define USE_UNICODE_PROPERTIES
|
||||
/* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */
|
||||
/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTS #18 */
|
||||
#define USE_UNICODE_ALL_LINE_TERMINATORS /* see Unicode.org UTS #18 */
|
||||
|
||||
|
||||
#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII
|
||||
@ -119,22 +120,29 @@ ONIG_EXTERN int onigenc_apply_all_case_fold_with_map P_((int map_size, const Oni
|
||||
ONIG_EXTERN int onigenc_get_case_fold_codes_by_str_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
|
||||
ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[]));
|
||||
ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end));
|
||||
ONIG_EXTERN int onigenc_is_mbc_newline_0x0a_se P_((OnigIterator* it, OnigPosition p, OnigPosition end));
|
||||
|
||||
|
||||
/* methods for single byte encoding */
|
||||
ONIG_EXTERN int onigenc_ascii_mbc_case_fold P_((OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower));
|
||||
ONIG_EXTERN int onigenc_ascii_mbc_case_fold_se P_((OnigIterator* it, OnigCaseFoldType flag, OnigPosition* p, OnigPosition end, UChar* lower));
|
||||
ONIG_EXTERN int onigenc_single_byte_mbc_enc_len P_((const UChar* p));
|
||||
ONIG_EXTERN int onigenc_single_byte_mbc_enc_len_se P_((OnigIterator* it, OnigPosition p));
|
||||
ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code P_((const UChar* p, const UChar* end));
|
||||
ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code_se P_((OnigIterator* it, OnigPosition p, OnigPosition end));
|
||||
ONIG_EXTERN int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code));
|
||||
ONIG_EXTERN int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf));
|
||||
ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((const UChar* start, const UChar* s));
|
||||
ONIG_EXTERN OnigPosition onigenc_single_byte_left_adjust_char_head_se P_((OnigIterator* it, OnigPosition start, OnigPosition s));
|
||||
ONIG_EXTERN int onigenc_always_true_is_allowed_reverse_match P_((const UChar* s, const UChar* end));
|
||||
ONIG_EXTERN int onigenc_always_false_is_allowed_reverse_match P_((const UChar* s, const UChar* end));
|
||||
ONIG_EXTERN int onigenc_ascii_is_code_ctype P_((OnigCodePoint code, unsigned int ctype));
|
||||
|
||||
/* methods for multi byte encoding */
|
||||
ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, const UChar* p, const UChar* end));
|
||||
ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code_se P_((OnigIterator* it, OnigEncoding enc, OnigPosition p, OnigPosition end));
|
||||
ONIG_EXTERN int onigenc_mbn_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower));
|
||||
ONIG_EXTERN int onigenc_mbn_mbc_case_fold_se P_((OnigIterator* it, OnigEncoding enc, OnigCaseFoldType flag, OnigPosition* p, OnigPosition end, UChar* lower));
|
||||
ONIG_EXTERN int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code));
|
||||
ONIG_EXTERN int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
|
||||
ONIG_EXTERN int onigenc_minimum_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end));
|
||||
@ -146,11 +154,13 @@ ONIG_EXTERN int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint co
|
||||
|
||||
|
||||
/* in enc/unicode.c */
|
||||
ONIG_EXTERN const OnigCodePoint* onigenc_unicode_code_script P_((OnigCodePoint code));
|
||||
ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype));
|
||||
ONIG_EXTERN int onigenc_utf16_32_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[]));
|
||||
ONIG_EXTERN int onigenc_unicode_ctype_code_range P_((int ctype, const OnigCodePoint* ranges[]));
|
||||
ONIG_EXTERN int onigenc_unicode_get_case_fold_codes_by_str P_((OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
|
||||
ONIG_EXTERN int onigenc_unicode_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold));
|
||||
ONIG_EXTERN int onigenc_unicode_mbc_case_fold_se P_((OnigIterator* it, OnigEncoding enc, OnigCaseFoldType flag, OnigPosition* pp, OnigPosition end, UChar* fold));
|
||||
ONIG_EXTERN int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
|
||||
|
||||
|
||||
|
1098
src/Onigmo/regexec.c
1098
src/Onigmo/regexec.c
File diff suppressed because it is too large
Load Diff
@ -62,20 +62,20 @@ re_adjust_startpos(regex_t* reg, const char* string, int size,
|
||||
}
|
||||
|
||||
extern int
|
||||
re_match(regex_t* reg, const char* str, int size, int pos,
|
||||
re_match(OnigIterator* it, regex_t* reg, OnigPosition str, OnigPosition size, OnigPosition pos,
|
||||
struct re_registers* regs)
|
||||
{
|
||||
return (int )onig_match(reg, (UChar* )str, (UChar* )(str + size),
|
||||
(UChar* )(str + pos), regs, ONIG_OPTION_NONE);
|
||||
return (int )onig_match(it, reg, str, (str + size),
|
||||
(str + pos), regs, ONIG_OPTION_NONE);
|
||||
}
|
||||
|
||||
extern int
|
||||
re_search(regex_t* bufp, const char* string, int size, int startpos, int range,
|
||||
re_search(OnigIterator* it, regex_t* bufp, OnigPosition str, OnigPosition size, OnigPosition startpos, OnigPosition range,
|
||||
struct re_registers* regs)
|
||||
{
|
||||
return (int )onig_search(bufp, (UChar* )string, (UChar* )(string + size),
|
||||
(UChar* )(string + startpos),
|
||||
(UChar* )(string + startpos + range),
|
||||
return (int )onig_search(it, bufp, str, (str + size),
|
||||
(str + startpos),
|
||||
(str + startpos + range),
|
||||
regs, ONIG_OPTION_NONE);
|
||||
}
|
||||
|
||||
|
@ -76,6 +76,8 @@
|
||||
#define USE_SHARED_CCLASS_TABLE
|
||||
#define USE_SUNDAY_QUICK_SEARCH
|
||||
|
||||
#define USE_SHARED_UNICODE_TABLE
|
||||
|
||||
#define INIT_MATCH_STACK_SIZE 160
|
||||
#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */
|
||||
|
||||
@ -336,6 +338,7 @@ typedef unsigned int BitStatusType;
|
||||
#define IS_POSIX_BRACKET_ALL_RANGE(option) ((option) & ONIG_OPTION_POSIX_BRACKET_ALL_RANGE)
|
||||
#define IS_WORD_BOUND_ALL_RANGE(option) ((option) & ONIG_OPTION_WORD_BOUND_ALL_RANGE)
|
||||
#define IS_NEWLINE_CRLF(option) ((option) & ONIG_OPTION_NEWLINE_CRLF)
|
||||
#define IS_WHOLEWORD(option) ((option) & SE_ONIG_OPTION_WHOLEWORD)
|
||||
|
||||
/* OP_SET_OPTION is required for these options.
|
||||
#define IS_DYNAMIC_OPTION(option) \
|
||||
@ -743,19 +746,19 @@ typedef struct {
|
||||
BBuf* mbuf; /* multi-byte info or NULL */
|
||||
} CClassNode;
|
||||
|
||||
typedef intptr_t OnigStackIndex;
|
||||
typedef OnigPosition OnigStackIndex;
|
||||
|
||||
typedef struct _OnigStackType {
|
||||
unsigned int type;
|
||||
union {
|
||||
struct {
|
||||
UChar *pcode; /* byte code position */
|
||||
UChar *pstr; /* string position */
|
||||
UChar *pstr_prev; /* previous char position of pstr */
|
||||
OnigPosition pstr; /* string position */
|
||||
OnigPosition pstr_prev; /* previous char position of pstr */
|
||||
#ifdef USE_COMBINATION_EXPLOSION_CHECK
|
||||
unsigned int state_check;
|
||||
#endif
|
||||
UChar *pkeep; /* keep pattern position */
|
||||
OnigPosition pkeep; /* keep pattern position */
|
||||
} state;
|
||||
struct {
|
||||
int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
|
||||
@ -767,20 +770,20 @@ typedef struct _OnigStackType {
|
||||
} repeat_inc;
|
||||
struct {
|
||||
int num; /* memory num */
|
||||
UChar *pstr; /* start/end position */
|
||||
OnigPosition pstr; /* start/end position */
|
||||
/* Following information is set, if this stack type is MEM-START */
|
||||
OnigStackIndex start; /* prev. info (for backtrack "(...)*" ) */
|
||||
OnigStackIndex end; /* prev. info (for backtrack "(...)*" ) */
|
||||
} mem;
|
||||
struct {
|
||||
int num; /* null check id */
|
||||
UChar *pstr; /* start position */
|
||||
OnigPosition pstr; /* start position */
|
||||
} null_check;
|
||||
#ifdef USE_SUBEXP_CALL
|
||||
struct {
|
||||
UChar *ret_addr; /* byte code position */
|
||||
int num; /* null check id */
|
||||
UChar *pstr; /* string position */
|
||||
OnigPosition pstr; /* string position */
|
||||
} call_frame;
|
||||
#endif
|
||||
} u;
|
||||
@ -791,11 +794,11 @@ typedef struct {
|
||||
size_t stack_n;
|
||||
OnigOptionType options;
|
||||
OnigRegion* region;
|
||||
const UChar* start; /* search start position */
|
||||
const UChar* gpos; /* global position (for \G: BEGIN_POSITION) */
|
||||
OnigPosition start; /* search start position */
|
||||
OnigPosition gpos; /* global position (for \G: BEGIN_POSITION) */
|
||||
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
|
||||
OnigPosition best_len; /* for ONIG_OPTION_FIND_LONGEST */
|
||||
UChar* best_s;
|
||||
OnigPosition best_s;
|
||||
#endif
|
||||
#ifdef USE_COMBINATION_EXPLOSION_CHECK
|
||||
void* state_check_buff;
|
||||
|
@ -35,7 +35,6 @@
|
||||
|
||||
#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
|
||||
|
||||
|
||||
OnigSyntaxType OnigSyntaxRuby = {
|
||||
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
|
||||
ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
|
||||
@ -64,7 +63,7 @@ OnigSyntaxType OnigSyntaxRuby = {
|
||||
ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
|
||||
ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
|
||||
ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
|
||||
, ( ONIG_OPTION_ASCII_RANGE | ONIG_OPTION_POSIX_BRACKET_ALL_RANGE |
|
||||
, ( /*ONIG_OPTION_ASCII_RANGE |*/ ONIG_OPTION_POSIX_BRACKET_ALL_RANGE |
|
||||
ONIG_OPTION_WORD_BOUND_ALL_RANGE )
|
||||
,
|
||||
{
|
||||
@ -5341,6 +5340,51 @@ onig_free_shared_cclass_table(void)
|
||||
|
||||
#endif /* USE_SHARED_CCLASS_TABLE */
|
||||
|
||||
#ifdef USE_SHARED_UNICODE_TABLE
|
||||
|
||||
extern st_table* FoldTable; /* fold-1, fold-2, fold-3 */
|
||||
extern st_table* Unfold1Table;
|
||||
extern st_table* Unfold2Table;
|
||||
extern st_table* Unfold3Table;
|
||||
extern int CaseFoldInited;
|
||||
|
||||
static int
|
||||
i_free_shared_unicode_table(st_str_end_key* key, Node* node, void* arg ARG_UNUSED)
|
||||
{
|
||||
if (IS_NOT_NULL(key)) xfree(key);
|
||||
return ST_DELETE;
|
||||
}
|
||||
|
||||
extern int
|
||||
onig_free_shared_unicode_table(void)
|
||||
{
|
||||
THREAD_ATOMIC_START;
|
||||
if (IS_NOT_NULL(FoldTable)) {
|
||||
onig_st_free_table(FoldTable);
|
||||
FoldTable = NULL;
|
||||
}
|
||||
|
||||
if (IS_NOT_NULL(Unfold1Table)) {
|
||||
onig_st_free_table(Unfold1Table);
|
||||
Unfold1Table = NULL;
|
||||
}
|
||||
|
||||
if (IS_NOT_NULL(Unfold2Table)) {
|
||||
onig_st_free_table(Unfold2Table);
|
||||
Unfold2Table = NULL;
|
||||
}
|
||||
|
||||
if (IS_NOT_NULL(Unfold3Table)) {
|
||||
onig_st_free_table(Unfold3Table);
|
||||
Unfold3Table = NULL;
|
||||
}
|
||||
CaseFoldInited = 0;
|
||||
|
||||
THREAD_ATOMIC_END;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif // USE_SHARED_UNICODE_TABLE
|
||||
|
||||
#ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
|
||||
static int
|
||||
|
@ -347,6 +347,10 @@ extern int onig_names_free P_((regex_t* reg));
|
||||
extern int onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
|
||||
extern int onig_free_shared_cclass_table P_((void));
|
||||
|
||||
#ifdef USE_SHARED_UNICODE_TABLE
|
||||
extern int onig_free_shared_unicode_table P_((void));
|
||||
#endif //USE_SHARED_UNICODE_TABLE
|
||||
|
||||
#ifdef ONIG_DEBUG
|
||||
#ifdef USE_NAMED_GROUP
|
||||
extern int onig_print_names(FILE*, regex_t*);
|
||||
|
@ -27,7 +27,9 @@
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
#define regex_t onig_regex_t
|
||||
#include "regint.h"
|
||||
#undef regex_t
|
||||
#include "onigposix.h"
|
||||
|
||||
#ifdef HAVE_STRING_H
|
||||
|
@ -48,6 +48,18 @@
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
/* #define ENC_STRING_LEN_SE(enc,s,len) len = strlen(s) */
|
||||
#define ENC_STRING_LEN_SE(enc,s,len) do { \
|
||||
if (ONIGENC_MBC_MINLEN(enc) == 1) { \
|
||||
OnigPosition tmps = s; \
|
||||
while (ONIG_CHARAT(tmps) != 0) tmps++; \
|
||||
len = tmps - s; \
|
||||
} \
|
||||
else { \
|
||||
len = onigenc_str_bytelen_null_se(it, enc, s); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
typedef struct {
|
||||
int onig_err;
|
||||
int posix_err;
|
||||
@ -163,12 +175,12 @@ regcomp(regex_t* reg, const char* pattern, int posix_options)
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int
|
||||
regexec(regex_t* reg, const char* str, size_t nmatch,
|
||||
extern OnigPosition
|
||||
regexec(OnigIterator* it, regex_t* reg, OnigPosition str, size_t nmatch,
|
||||
regmatch_t pmatch[], int posix_options)
|
||||
{
|
||||
int r, i, len;
|
||||
UChar* end;
|
||||
OnigPosition r, i, len;
|
||||
OnigPosition end;
|
||||
regmatch_t* pm;
|
||||
OnigOptionType options;
|
||||
|
||||
@ -190,9 +202,9 @@ regexec(regex_t* reg, const char* str, size_t nmatch,
|
||||
pm = pmatch;
|
||||
}
|
||||
|
||||
ENC_STRING_LEN(ONIG_C(reg)->enc, str, len);
|
||||
end = (UChar* )(str + len);
|
||||
r = (int )onig_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end,
|
||||
ENC_STRING_LEN_SE(ONIG_C(reg)->enc, str, len);
|
||||
end = str + len;
|
||||
r = onig_search(it, ONIG_C(reg), str, end, str, end,
|
||||
(OnigRegion* )pm, options);
|
||||
|
||||
if (r >= 0) {
|
||||
@ -207,7 +219,7 @@ regexec(regex_t* reg, const char* str, size_t nmatch,
|
||||
pmatch[i].rm_so = pmatch[i].rm_eo = ONIG_REGION_NOTPOS;
|
||||
}
|
||||
else {
|
||||
r = onig2posix_error_code(r);
|
||||
r = onig2posix_error_code((int)r);
|
||||
}
|
||||
|
||||
if (pm != pmatch && pm != NULL)
|
||||
|
@ -46,6 +46,22 @@ OnigSyntaxType OnigSyntaxASIS = {
|
||||
}
|
||||
};
|
||||
|
||||
OnigSyntaxType OnigSyntaxWildChar = {
|
||||
ONIG_SYN_OP_VARIABLE_META_CHARACTERS
|
||||
, 0
|
||||
, 0
|
||||
, ONIG_OPTION_SINGLELINE
|
||||
,
|
||||
{
|
||||
(OnigCodePoint )'\\' /* esc */
|
||||
, (OnigCodePoint )'?' /* anychar '.' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
|
||||
, (OnigCodePoint )'*' /* anychar anytime */
|
||||
}
|
||||
};
|
||||
|
||||
OnigSyntaxType OnigSyntaxPosixBasic = {
|
||||
( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
|
||||
ONIG_SYN_OP_ESC_BRACE_INTERVAL )
|
||||
|
@ -9,12 +9,12 @@
|
||||
#include <string.h>
|
||||
#include "oniguruma.h"
|
||||
|
||||
/* #define USE_UNICODE_ALL_LINE_TERMINATORS */
|
||||
#define USE_UNICODE_ALL_LINE_TERMINATORS
|
||||
|
||||
static int nfail = 0;
|
||||
|
||||
static void result(int no, int from, int to,
|
||||
int expected_from, int expected_to)
|
||||
static void result(int no, OnigPosition from, OnigPosition to,
|
||||
OnigPosition expected_from, OnigPosition expected_to)
|
||||
{
|
||||
fprintf(stderr, "%3d: ", no);
|
||||
if (from == expected_from && to == expected_to) {
|
||||
@ -22,7 +22,7 @@ static void result(int no, int from, int to,
|
||||
}
|
||||
else {
|
||||
fprintf(stderr, "Fail: expected: (%d-%d), result: (%d-%d)\n",
|
||||
expected_from, expected_to, from, to);
|
||||
(int)expected_from, (int)expected_to, (int)from, (int)to);
|
||||
|
||||
nfail++;
|
||||
}
|
||||
@ -32,12 +32,13 @@ static int
|
||||
x0(int no, char* pattern_arg, char* str_arg,
|
||||
int start_offset, int expected_from, int expected_to, int backward)
|
||||
{
|
||||
int r;
|
||||
unsigned char *start, *range, *end;
|
||||
OnigPosition r;
|
||||
OnigPosition start, range, end;
|
||||
regex_t* reg;
|
||||
OnigErrorInfo einfo;
|
||||
OnigRegion *region;
|
||||
UChar *pattern, *str;
|
||||
OnigIterator it = {onig_default_charat, str_arg};
|
||||
|
||||
pattern = (UChar* )pattern_arg;
|
||||
str = (UChar* )str_arg;
|
||||
@ -53,16 +54,16 @@ x0(int no, char* pattern_arg, char* str_arg,
|
||||
|
||||
region = onig_region_new();
|
||||
|
||||
end = str + strlen((char* )str);
|
||||
end = strlen((char* )str);
|
||||
if (backward) {
|
||||
start = end + start_offset;
|
||||
range = str;
|
||||
range = 0;
|
||||
}
|
||||
else {
|
||||
start = str + start_offset;
|
||||
start = start_offset;
|
||||
range = end;
|
||||
}
|
||||
r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
|
||||
r = onig_search(&it, reg, 0, end, start, range, region, ONIG_OPTION_NONE);
|
||||
if (r >= 0 || r == ONIG_MISMATCH) {
|
||||
result(no, region->beg[0], region->end[0], expected_from, expected_to);
|
||||
}
|
||||
|
@ -5,24 +5,24 @@
|
||||
#include "oniguruma.h"
|
||||
|
||||
static int
|
||||
search(regex_t* reg, unsigned char* str, unsigned char* end)
|
||||
search(OnigIterator* it, regex_t* reg, OnigPosition str, OnigPosition end)
|
||||
{
|
||||
int r;
|
||||
unsigned char *start, *range;
|
||||
OnigPosition r;
|
||||
OnigPosition start, range;
|
||||
OnigRegion *region;
|
||||
|
||||
region = onig_region_new();
|
||||
|
||||
start = str;
|
||||
range = end;
|
||||
r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
|
||||
r = onig_search(it, reg, str, end, start, range, region, ONIG_OPTION_NONE);
|
||||
if (r >= 0) {
|
||||
int i;
|
||||
|
||||
fprintf(stderr, "match at %d (%s)\n", r,
|
||||
fprintf(stderr, "match at %d (%s)\n", (int)r,
|
||||
ONIGENC_NAME(onig_get_encoding(reg)));
|
||||
for (i = 0; i < region->num_regs; i++) {
|
||||
fprintf(stderr, "%d: (%ld-%ld)\n", i, region->beg[i], region->end[i]);
|
||||
fprintf(stderr, "%d: (%ld-%ld)\n", i, (int)region->beg[i], (int)region->end[i]);
|
||||
}
|
||||
}
|
||||
else if (r == ONIG_MISMATCH) {
|
||||
@ -45,13 +45,15 @@ static int
|
||||
exec(OnigEncoding enc, OnigOptionType options,
|
||||
char* apattern, char* astr)
|
||||
{
|
||||
int r;
|
||||
unsigned char *end;
|
||||
OnigPosition r;
|
||||
OnigPosition end;
|
||||
regex_t* reg;
|
||||
OnigErrorInfo einfo;
|
||||
UChar* pattern = (UChar* )apattern;
|
||||
UChar* str = (UChar* )astr;
|
||||
OnigIterator it = {onig_default_charat, str};
|
||||
|
||||
onig_init();
|
||||
r = onig_new(®, pattern,
|
||||
pattern + onigenc_str_bytelen_null(enc, pattern),
|
||||
options, enc, ONIG_SYNTAX_DEFAULT, &einfo);
|
||||
@ -62,8 +64,8 @@ exec(OnigEncoding enc, OnigOptionType options,
|
||||
return -1;
|
||||
}
|
||||
|
||||
end = str + onigenc_str_bytelen_null(enc, str);
|
||||
r = search(reg, str, end);
|
||||
end = onigenc_str_bytelen_null(enc, str);
|
||||
r = search(&it, reg, 0, end);
|
||||
|
||||
onig_free(reg);
|
||||
onig_end();
|
||||
@ -84,13 +86,14 @@ static int
|
||||
exec_deluxe(OnigEncoding pattern_enc, OnigEncoding str_enc,
|
||||
OnigOptionType options, char* apattern, char* astr)
|
||||
{
|
||||
int r;
|
||||
unsigned char *end;
|
||||
OnigPosition r;
|
||||
OnigPosition end;
|
||||
regex_t* reg;
|
||||
OnigCompileInfo ci;
|
||||
OnigErrorInfo einfo;
|
||||
UChar* pattern = (UChar* )apattern;
|
||||
UChar* str = (UChar* )astr;
|
||||
OnigIterator it = {onig_default_charat, str};
|
||||
|
||||
ci.num_of_elements = 5;
|
||||
ci.pattern_enc = pattern_enc;
|
||||
@ -109,8 +112,8 @@ exec_deluxe(OnigEncoding pattern_enc, OnigEncoding str_enc,
|
||||
return -1;
|
||||
}
|
||||
|
||||
end = str + onigenc_str_bytelen_null(str_enc, str);
|
||||
r = search(reg, str, end);
|
||||
end = onigenc_str_bytelen_null(str_enc, str);
|
||||
r = search(&it, reg, 0, end);
|
||||
|
||||
onig_free(reg);
|
||||
onig_end();
|
||||
|
@ -20,18 +20,19 @@ node_callback(int group, OnigPosition beg, OnigPosition end, int level,
|
||||
for (i = 0; i < level * 2; i++)
|
||||
fputc(' ', stderr);
|
||||
|
||||
fprintf(stderr, "%d: (%ld-%ld)\n", group, beg, end);
|
||||
fprintf(stderr, "%d: (%ld-%ld)\n", group, (int)beg, (int)end);
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int ex(unsigned char* str, unsigned char* pattern,
|
||||
OnigSyntaxType* syntax)
|
||||
{
|
||||
int r;
|
||||
unsigned char *start, *range, *end;
|
||||
OnigPosition r;
|
||||
OnigPosition start, range, end;
|
||||
regex_t* reg;
|
||||
OnigErrorInfo einfo;
|
||||
OnigRegion *region;
|
||||
OnigIterator it = {onig_default_charat, str};
|
||||
|
||||
r = onig_new(®, pattern, pattern + strlen((char* )pattern),
|
||||
ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, syntax, &einfo);
|
||||
@ -48,16 +49,16 @@ extern int ex(unsigned char* str, unsigned char* pattern,
|
||||
|
||||
region = onig_region_new();
|
||||
|
||||
end = str + strlen((char* )str);
|
||||
start = str;
|
||||
end = strlen((char* )str);
|
||||
start = 0;
|
||||
range = end;
|
||||
r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
|
||||
r = onig_search(&it, reg, 0, end, start, range, region, ONIG_OPTION_NONE);
|
||||
if (r >= 0) {
|
||||
int i;
|
||||
|
||||
fprintf(stderr, "match at %d\n", r);
|
||||
fprintf(stderr, "match at %d\n", (int)r);
|
||||
for (i = 0; i < region->num_regs; i++) {
|
||||
fprintf(stderr, "%d: (%ld-%ld)\n", i, region->beg[i], region->end[i]);
|
||||
fprintf(stderr, "%d: (%ld-%ld)\n", i, (long)region->beg[i], (long)region->end[i]);
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
|
@ -19,21 +19,22 @@ name_callback(const UChar* name, const UChar* name_end,
|
||||
ref = onig_name_to_backref_number(reg, name, name_end, region);
|
||||
s = (ref == gn ? "*" : "");
|
||||
fprintf(stderr, "%s (%d): ", name, gn);
|
||||
fprintf(stderr, "(%ld-%ld) %s\n", region->beg[gn], region->end[gn], s);
|
||||
fprintf(stderr, "(%ld-%ld) %s\n", (int)region->beg[gn], (int)region->end[gn], s);
|
||||
}
|
||||
return 0; /* 0: continue */
|
||||
}
|
||||
|
||||
extern int main(int argc, char* argv[])
|
||||
{
|
||||
int r;
|
||||
unsigned char *start, *range, *end;
|
||||
OnigPosition r;
|
||||
OnigPosition start, range, end;
|
||||
regex_t* reg;
|
||||
OnigErrorInfo einfo;
|
||||
OnigRegion *region;
|
||||
|
||||
static UChar* pattern = (UChar* )"(?<foo>a*)(?<bar>b*)(?<foo>c*)";
|
||||
static UChar* str = (UChar* )"aaabbbbcc";
|
||||
OnigIterator it = {onig_default_charat, str};
|
||||
|
||||
r = onig_new(®, pattern, pattern + strlen((char* )pattern),
|
||||
ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_DEFAULT, &einfo);
|
||||
@ -48,10 +49,10 @@ extern int main(int argc, char* argv[])
|
||||
|
||||
region = onig_region_new();
|
||||
|
||||
end = str + strlen((char* )str);
|
||||
start = str;
|
||||
end = strlen((char* )str);
|
||||
start = 0;
|
||||
range = end;
|
||||
r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
|
||||
r = onig_search(&it, reg, 0, end, start, range, region, ONIG_OPTION_NONE);
|
||||
if (r >= 0) {
|
||||
fprintf(stderr, "match at %d\n\n", r);
|
||||
r = onig_foreach_name(reg, name_callback, (void* )region);
|
||||
|
@ -2,6 +2,10 @@
|
||||
* posix.c
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#define regex_t onig_regex_t
|
||||
#include "regint.h"
|
||||
#undef regex_t
|
||||
|
||||
#include "onigposix.h"
|
||||
|
||||
typedef unsigned char UChar;
|
||||
@ -11,8 +15,9 @@ static int x(regex_t* reg, unsigned char* pattern, unsigned char* str)
|
||||
int r, i;
|
||||
char buf[200];
|
||||
regmatch_t pmatch[20];
|
||||
OnigIterator it = {onig_default_charat, str};
|
||||
|
||||
r = regexec(reg, (char* )str, reg->re_nsub + 1, pmatch, 0);
|
||||
r = (int)regexec(&it, reg, 0, reg->re_nsub + 1, pmatch, 0);
|
||||
if (r != 0 && r != REG_NOMATCH) {
|
||||
regerror(r, reg, buf, sizeof(buf));
|
||||
fprintf(stderr, "ERROR: %s\n", buf);
|
||||
|
@ -7,14 +7,15 @@
|
||||
|
||||
extern int main(int argc, char* argv[])
|
||||
{
|
||||
int r;
|
||||
unsigned char *start, *range, *end;
|
||||
OnigPosition r;
|
||||
OnigPosition start, range, end;
|
||||
regex_t* reg;
|
||||
OnigErrorInfo einfo;
|
||||
OnigRegion *region;
|
||||
|
||||
static UChar* pattern = (UChar* )"a(.*)b|[e-f]+";
|
||||
static UChar* str = (UChar* )"zzzzaffffffffb";
|
||||
OnigIterator it = {onig_default_charat, str};
|
||||
|
||||
r = onig_new(®, pattern, pattern + strlen((char* )pattern),
|
||||
ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_DEFAULT, &einfo);
|
||||
@ -27,16 +28,16 @@ extern int main(int argc, char* argv[])
|
||||
|
||||
region = onig_region_new();
|
||||
|
||||
end = str + strlen((char* )str);
|
||||
start = str;
|
||||
end = strlen((char* )str);
|
||||
start = 0;
|
||||
range = end;
|
||||
r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
|
||||
r = onig_search(&it, reg, 0, end, start, range, region, ONIG_OPTION_NONE);
|
||||
if (r >= 0) {
|
||||
int i;
|
||||
|
||||
fprintf(stderr, "match at %d\n", r);
|
||||
for (i = 0; i < region->num_regs; i++) {
|
||||
fprintf(stderr, "%d: (%ld-%ld)\n", i, region->beg[i], region->end[i]);
|
||||
fprintf(stderr, "%d: (%ld-%ld)\n", i, (int)region->beg[i], (int)region->end[i]);
|
||||
}
|
||||
}
|
||||
else if (r == ONIG_MISMATCH) {
|
||||
|
@ -9,14 +9,15 @@ extern int main(int argc, char* argv[])
|
||||
{
|
||||
static OnigSyntaxType SQLSyntax;
|
||||
|
||||
int r;
|
||||
unsigned char *start, *range, *end;
|
||||
OnigPosition r;
|
||||
OnigPosition start, range, end;
|
||||
regex_t* reg;
|
||||
OnigErrorInfo einfo;
|
||||
OnigRegion *region;
|
||||
|
||||
static UChar* pattern = (UChar* )"\\_%\\\\__zz";
|
||||
static UChar* str = (UChar* )"a_abcabcabc\\ppzz";
|
||||
OnigIterator it = {onig_default_charat, str};
|
||||
|
||||
onig_set_syntax_op (&SQLSyntax, ONIG_SYN_OP_VARIABLE_META_CHARACTERS);
|
||||
onig_set_syntax_op2 (&SQLSyntax, 0);
|
||||
@ -44,16 +45,16 @@ extern int main(int argc, char* argv[])
|
||||
|
||||
region = onig_region_new();
|
||||
|
||||
end = str + strlen((char* )str);
|
||||
start = str;
|
||||
end = strlen((char* )str);
|
||||
start = 0;
|
||||
range = end;
|
||||
r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
|
||||
r = onig_search(&it, reg, 0, end, start, range, region, ONIG_OPTION_NONE);
|
||||
if (r >= 0) {
|
||||
int i;
|
||||
|
||||
fprintf(stderr, "match at %d\n", r);
|
||||
for (i = 0; i < region->num_regs; i++) {
|
||||
fprintf(stderr, "%d: (%ld-%ld)\n", i, region->beg[i], region->end[i]);
|
||||
fprintf(stderr, "%d: (%ld-%ld)\n", i, (int)region->beg[i], (int)region->end[i]);
|
||||
}
|
||||
}
|
||||
else if (r == ONIG_MISMATCH) {
|
||||
|
@ -8,13 +8,14 @@
|
||||
extern int exec(OnigSyntaxType* syntax,
|
||||
char* apattern, char* astr)
|
||||
{
|
||||
int r;
|
||||
unsigned char *start, *range, *end;
|
||||
OnigPosition r;
|
||||
OnigPosition start, range, end;
|
||||
regex_t* reg;
|
||||
OnigErrorInfo einfo;
|
||||
OnigRegion *region;
|
||||
UChar* pattern = (UChar* )apattern;
|
||||
UChar* str = (UChar* )astr;
|
||||
OnigIterator it = {onig_default_charat, str};
|
||||
|
||||
r = onig_new(®, pattern, pattern + strlen((char* )pattern),
|
||||
ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, syntax, &einfo);
|
||||
@ -27,16 +28,16 @@ extern int exec(OnigSyntaxType* syntax,
|
||||
|
||||
region = onig_region_new();
|
||||
|
||||
end = str + strlen((char* )str);
|
||||
start = str;
|
||||
end = strlen((char* )str);
|
||||
start = 0;
|
||||
range = end;
|
||||
r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
|
||||
r = onig_search(&it, reg, 0, end, start, range, region, ONIG_OPTION_NONE);
|
||||
if (r >= 0) {
|
||||
int i;
|
||||
|
||||
fprintf(stderr, "match at %d\n", r);
|
||||
for (i = 0; i < region->num_regs; i++) {
|
||||
fprintf(stderr, "%d: (%ld-%ld)\n", i, region->beg[i], region->end[i]);
|
||||
fprintf(stderr, "%d: (%ld-%ld)\n", i, (int)region->beg[i], (int)region->end[i]);
|
||||
}
|
||||
}
|
||||
else if (r == ONIG_MISMATCH) {
|
||||
|
@ -33,7 +33,7 @@ static OnigRegion* region;
|
||||
|
||||
static void xx(char* pattern, char* str, int from, int to, int mem, int not)
|
||||
{
|
||||
int r;
|
||||
OnigPosition r;
|
||||
|
||||
#ifdef POSIX_TEST
|
||||
regex_t reg;
|
||||
@ -89,6 +89,7 @@ static void xx(char* pattern, char* str, int from, int to, int mem, int not)
|
||||
regex_t* reg;
|
||||
OnigErrorInfo einfo;
|
||||
OnigSyntaxType syn = *ONIG_SYNTAX_DEFAULT;
|
||||
OnigIterator it = {onig_default_charat, str};
|
||||
|
||||
/* ONIG_OPTION_OFF(syn.options, ONIG_OPTION_ASCII_RANGE); */
|
||||
|
||||
@ -102,8 +103,8 @@ static void xx(char* pattern, char* str, int from, int to, int mem, int not)
|
||||
return ;
|
||||
}
|
||||
|
||||
r = onig_search(reg, (UChar* )str, (UChar* )(str + SLEN(str)),
|
||||
(UChar* )str, (UChar* )(str + SLEN(str)),
|
||||
r = onig_search(&it, reg, 0, SLEN(str),
|
||||
0, SLEN(str),
|
||||
region, ONIG_OPTION_NONE);
|
||||
if (r < ONIG_MISMATCH) {
|
||||
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
|
||||
|
@ -57,7 +57,7 @@ static void uconv(char* from, char* to, int len)
|
||||
|
||||
static void xx(char* pattern, char* str, int from, int to, int mem, int not)
|
||||
{
|
||||
int r;
|
||||
OnigPosition r;
|
||||
char cpat[4000], cstr[4000];
|
||||
|
||||
#ifdef POSIX_TEST
|
||||
@ -118,6 +118,7 @@ static void xx(char* pattern, char* str, int from, int to, int mem, int not)
|
||||
OnigCompileInfo ci;
|
||||
OnigErrorInfo einfo;
|
||||
OnigSyntaxType syn = *ONIG_SYNTAX_DEFAULT;
|
||||
OnigIterator it = {onig_default_charat, str};
|
||||
|
||||
/* ONIG_OPTION_OFF(syn.options, ONIG_OPTION_ASCII_RANGE); */
|
||||
|
||||
@ -148,8 +149,8 @@ static void xx(char* pattern, char* str, int from, int to, int mem, int not)
|
||||
return ;
|
||||
}
|
||||
|
||||
r = onig_search(reg, (UChar* )str, (UChar* )(str + ulen(str)),
|
||||
(UChar* )str, (UChar* )(str + ulen(str)),
|
||||
r = onig_search(&it, reg, 0, ulen(str),
|
||||
0, ulen(str),
|
||||
region, ONIG_OPTION_NONE);
|
||||
if (r < ONIG_MISMATCH) {
|
||||
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
|
||||
|
@ -33,7 +33,7 @@ static OnigRegion* region;
|
||||
|
||||
static void xx(char* pattern, char* str, int from, int to, int mem, int not)
|
||||
{
|
||||
int r;
|
||||
OnigPosition r;
|
||||
|
||||
#ifdef POSIX_TEST
|
||||
regex_t reg;
|
||||
@ -89,6 +89,7 @@ static void xx(char* pattern, char* str, int from, int to, int mem, int not)
|
||||
regex_t* reg;
|
||||
OnigErrorInfo einfo;
|
||||
OnigSyntaxType syn = *ONIG_SYNTAX_DEFAULT;
|
||||
OnigIterator it = {onig_default_charat, str};
|
||||
|
||||
/* ONIG_OPTION_OFF(syn.options, ONIG_OPTION_ASCII_RANGE); */
|
||||
|
||||
@ -102,8 +103,8 @@ static void xx(char* pattern, char* str, int from, int to, int mem, int not)
|
||||
return ;
|
||||
}
|
||||
|
||||
r = onig_search(reg, (UChar* )str, (UChar* )(str + SLEN(str)),
|
||||
(UChar* )str, (UChar* )(str + SLEN(str)),
|
||||
r = onig_search(&it, reg, 0, SLEN(str),
|
||||
0, SLEN(str),
|
||||
region, ONIG_OPTION_NONE);
|
||||
if (r < ONIG_MISMATCH) {
|
||||
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
|
||||
|
@ -0,0 +1,7 @@
|
||||
# Components
|
||||
|
||||
Below is a list of (some) WindTerm components in alphabetical order, along with a brief description of each.
|
||||
|
||||
## Onigmo
|
||||
|
||||
An improved version based on Onigmo 5.13.5. In particular, the addition of iterator makes it possible to match gap buffer or nonadjacent memory blocks. Please refer to the sample files for how to use.
|
Loading…
Reference in New Issue
Block a user