Add iterator to search gap buffer and wildcard matching, whole word matching.

This commit is contained in:
KingToolbox 2020-07-17 02:57:05 +08:00
parent 432b7676da
commit 855a3e54d3
58 changed files with 2656 additions and 643 deletions

View File

@ -32,20 +32,25 @@
OnigEncodingType OnigEncodingASCII = {
onigenc_single_byte_mbc_enc_len,
onigenc_single_byte_mbc_enc_len_se,
"US-ASCII", /* name */
1, /* max byte length */
1, /* min byte length */
onigenc_is_mbc_newline_0x0a,
onigenc_is_mbc_newline_0x0a_se,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_mbc_to_code_se,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
onigenc_ascii_mbc_case_fold,
onigenc_ascii_mbc_case_fold_se,
onigenc_ascii_apply_all_case_fold,
onigenc_ascii_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
onigenc_ascii_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_left_adjust_char_head_se,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

View File

@ -54,12 +54,24 @@ big5_mbc_enc_len(const UChar* p)
return EncLen_BIG5[*p];
}
static int
big5_mbc_enc_len_se(OnigIterator* it, OnigPosition p)
{
return EncLen_BIG5[ONIG_CHARAT(p)];
}
static OnigCodePoint
big5_mbc_to_code(const UChar* p, const UChar* end)
{
return onigenc_mbn_mbc_to_code(ONIG_ENCODING_BIG5, p, end);
}
static OnigCodePoint
big5_mbc_to_code_se(OnigIterator* it, OnigPosition p, OnigPosition end)
{
return onigenc_mbn_mbc_to_code_se(it, ONIG_ENCODING_BIG5, p, end);
}
static int
big5_code_to_mbc(OnigCodePoint code, UChar *buf)
{
@ -74,6 +86,14 @@ big5_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end,
pp, end, lower);
}
static int
big5_mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag, OnigPosition* pp, OnigPosition end,
UChar* lower)
{
return onigenc_mbn_mbc_case_fold_se(it, ONIG_ENCODING_BIG5, flag,
pp, end, lower);
}
#if 0
static int
big5_is_mbc_ambiguous(OnigCaseFoldType flag,
@ -134,6 +154,29 @@ big5_left_adjust_char_head(const UChar* start, const UChar* s)
return (UChar* )(p + ((s - p) & ~1));
}
static OnigPosition
big5_left_adjust_char_head_se(OnigIterator* it, OnigPosition start, OnigPosition s)
{
OnigPosition p;
int len;
if (s <= start) return s;
p = s;
if (BIG5_ISMB_TRAIL(ONIG_CHARAT(p))) {
while (p > start) {
if (! BIG5_ISMB_FIRST(ONIG_CHARAT(--p))) {
p++;
break;
}
}
}
len = enclen_se(it, ONIG_ENCODING_BIG5, p);
if (p + len > s) return p;
p += len;
return (p + ((s - p) & ~1));
}
static int
big5_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
{
@ -144,20 +187,25 @@ big5_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
OnigEncodingType OnigEncodingBIG5 = {
big5_mbc_enc_len,
big5_mbc_enc_len_se,
"Big5", /* name */
2, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_is_mbc_newline_0x0a_se,
big5_mbc_to_code,
big5_mbc_to_code_se,
onigenc_mb2_code_to_mbclen,
big5_code_to_mbc,
big5_mbc_case_fold,
big5_mbc_case_fold_se,
onigenc_ascii_apply_all_case_fold,
onigenc_ascii_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
big5_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
big5_left_adjust_char_head,
big5_left_adjust_char_head_se,
big5_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

View File

@ -115,6 +115,17 @@ cp1251_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
return 1;
}
static int
cp1251_mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag ARG_UNUSED,
OnigPosition* pp, OnigPosition end ARG_UNUSED, UChar* lower)
{
const UChar c = ONIG_CHARAT(*pp);
*lower = ENC_CP1251_TO_LOWER_CASE(c);
(*pp)++;
return 1;
}
static int
cp1251_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
@ -182,20 +193,25 @@ cp1251_get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncodingType OnigEncodingCP1251 = {
onigenc_single_byte_mbc_enc_len,
onigenc_single_byte_mbc_enc_len_se,
"CP1251", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_is_mbc_newline_0x0a_se,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_mbc_to_code_se,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
cp1251_mbc_case_fold,
cp1251_mbc_case_fold_se,
cp1251_apply_all_case_fold,
cp1251_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
cp1251_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_left_adjust_char_head_se,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

View File

@ -148,6 +148,12 @@ mbc_enc_len(const UChar* p)
return EncLen_EUCJP[*p];
}
static int
mbc_enc_len_se(OnigIterator* it, OnigPosition p)
{
return EncLen_EUCJP[ONIG_CHARAT(p)];
}
static OnigCodePoint
mbc_to_code(const UChar* p, const UChar* end)
{
@ -166,6 +172,24 @@ mbc_to_code(const UChar* p, const UChar* end)
return n;
}
static OnigCodePoint
mbc_to_code_se(OnigIterator* it, OnigPosition p, OnigPosition end)
{
int c, i, len;
OnigCodePoint n;
len = mbc_enc_len_se(it, p);
n = (OnigCodePoint )ONIG_CHARAT(p++);
if (len == 1) return n;
for (i = 1; i < len; i++) {
if (p >= end) break;
c = ONIG_CHARAT(p++);
n <<= 8; n += c;
}
return n;
}
static int
code_to_mbclen(OnigCodePoint code)
{
@ -310,6 +334,28 @@ mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
}
}
static int
mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag ARG_UNUSED,
OnigPosition* pp, OnigPosition end ARG_UNUSED, UChar* lower)
{
const UChar c = ONIG_CHARAT(*pp);
if (ONIGENC_IS_MBC_ASCII_SE(c)) {
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(c);
(*pp)++;
return 1;
}
else {
OnigCodePoint code;
int len;
code = get_lower_case(mbc_to_code_se(it, *pp, end));
len = code_to_mbc(code, lower);
(*pp) += len;
return len; /* return byte length of converted char to lower */
}
}
static UChar*
left_adjust_char_head(const UChar* start, const UChar* s)
{
@ -329,6 +375,25 @@ left_adjust_char_head(const UChar* start, const UChar* s)
return (UChar* )(p + ((s - p) & ~1));
}
static OnigPosition
left_adjust_char_head_se(OnigIterator* it, OnigPosition start, OnigPosition s)
{
/* In this encoding
mb-trail bytes doesn't mix with single bytes.
*/
OnigPosition p;
int len;
if (s <= start) return s;
p = s;
while (!eucjp_islead(ONIG_CHARAT(p)) && p > start) p--;
len = mbc_enc_len_se(it, p);
if (p + len > s) return p;
p += len;
return (p + ((s - p) & ~1));
}
static int
is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
{
@ -512,20 +577,25 @@ get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out,
OnigEncodingType OnigEncodingEUC_JP = {
mbc_enc_len,
mbc_enc_len_se,
"EUC-JP", /* name */
3, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_is_mbc_newline_0x0a_se,
mbc_to_code,
mbc_to_code_se,
code_to_mbclen,
code_to_mbc,
mbc_case_fold,
mbc_case_fold_se,
apply_all_case_fold,
get_case_fold_codes_by_str,
property_name_to_ctype,
is_code_ctype,
get_ctype_code_range,
left_adjust_char_head,
left_adjust_char_head_se,
is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

View File

@ -54,12 +54,24 @@ euckr_mbc_enc_len(const UChar* p)
return EncLen_EUCKR[*p];
}
static int
euckr_mbc_enc_len_se(OnigIterator* it, OnigPosition p)
{
return EncLen_EUCKR[ONIG_CHARAT(p)];
}
static OnigCodePoint
euckr_mbc_to_code(const UChar* p, const UChar* end)
{
return onigenc_mbn_mbc_to_code(ONIG_ENCODING_EUC_KR, p, end);
}
static OnigCodePoint
euckr_mbc_to_code_se(OnigIterator* it, OnigPosition p, OnigPosition end)
{
return onigenc_mbn_mbc_to_code_se(it, ONIG_ENCODING_EUC_KR, p, end);
}
static int
euckr_code_to_mbc(OnigCodePoint code, UChar *buf)
{
@ -74,6 +86,14 @@ euckr_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end,
pp, end, lower);
}
static int
euckr_mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag, OnigPosition* pp, OnigPosition end,
UChar* lower)
{
return onigenc_mbn_mbc_case_fold_se(it, ONIG_ENCODING_EUC_KR, flag,
pp, end, lower);
}
#if 0
static int
euckr_is_mbc_ambiguous(OnigCaseFoldType flag,
@ -110,6 +130,25 @@ euckr_left_adjust_char_head(const UChar* start, const UChar* s)
return (UChar* )(p + ((s - p) & ~1));
}
static OnigPosition
euckr_left_adjust_char_head_se(OnigIterator* it, OnigPosition start, OnigPosition s)
{
/* Assumed in this encoding,
mb-trail bytes don't mix with single bytes.
*/
OnigPosition p;
int len;
if (s <= start) return s;
p = s;
while (!euckr_islead(ONIG_CHARAT(p)) && p > start) p--;
len = enclen_se(it, ONIG_ENCODING_EUC_KR, p);
if (p + len > s) return p;
p += len;
return (p + ((s - p) & ~1));
}
static int
euckr_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
{
@ -120,20 +159,25 @@ euckr_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
OnigEncodingType OnigEncodingEUC_KR = {
euckr_mbc_enc_len,
euckr_mbc_enc_len_se,
"EUC-KR", /* name */
2, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_is_mbc_newline_0x0a_se,
euckr_mbc_to_code,
euckr_mbc_to_code_se,
onigenc_mb2_code_to_mbclen,
euckr_code_to_mbc,
euckr_mbc_case_fold,
euckr_mbc_case_fold_se,
onigenc_ascii_apply_all_case_fold,
onigenc_ascii_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
euckr_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
euckr_left_adjust_char_head,
euckr_left_adjust_char_head_se,
euckr_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};
@ -141,20 +185,25 @@ OnigEncodingType OnigEncodingEUC_KR = {
/* Same with OnigEncodingEUC_KR except the name */
OnigEncodingType OnigEncodingEUC_CN = {
euckr_mbc_enc_len,
euckr_mbc_enc_len_se,
"EUC-CN", /* name */
2, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_is_mbc_newline_0x0a_se,
euckr_mbc_to_code,
euckr_mbc_to_code_se,
onigenc_mb2_code_to_mbclen,
euckr_code_to_mbc,
euckr_mbc_case_fold,
euckr_mbc_case_fold_se,
onigenc_ascii_apply_all_case_fold,
onigenc_ascii_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
euckr_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
euckr_left_adjust_char_head,
euckr_left_adjust_char_head_se,
euckr_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

View File

@ -54,12 +54,24 @@ euctw_mbc_enc_len(const UChar* p)
return EncLen_EUCTW[*p];
}
static int
euctw_mbc_enc_len_se(OnigIterator* it, OnigPosition p)
{
return EncLen_EUCTW[ONIG_CHARAT(p)];
}
static OnigCodePoint
euctw_mbc_to_code(const UChar* p, const UChar* end)
{
return onigenc_mbn_mbc_to_code(ONIG_ENCODING_EUC_TW, p, end);
}
static OnigCodePoint
euctw_mbc_to_code_se(OnigIterator* it, OnigPosition p, OnigPosition end)
{
return onigenc_mbn_mbc_to_code_se(it, ONIG_ENCODING_EUC_TW, p, end);
}
static int
euctw_code_to_mbc(OnigCodePoint code, UChar *buf)
{
@ -74,6 +86,14 @@ euctw_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end,
pp, end, lower);
}
static int
euctw_mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag, OnigPosition* pp, OnigPosition end,
UChar* lower)
{
return onigenc_mbn_mbc_case_fold_se(it, ONIG_ENCODING_EUC_TW, flag,
pp, end, lower);
}
static int
euctw_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
@ -101,6 +121,25 @@ euctw_left_adjust_char_head(const UChar* start, const UChar* s)
return (UChar* )(p + ((s - p) & ~1));
}
static OnigPosition
euctw_left_adjust_char_head_se(OnigIterator* it, OnigPosition start, OnigPosition s)
{
/* Assumed in this encoding,
mb-trail bytes don't mix with single bytes.
*/
OnigPosition p;
int len;
if (s <= start) return s;
p = s;
while (!euctw_islead(ONIG_CHARAT(p)) && p > start) p--;
len = enclen_se(it, ONIG_ENCODING_EUC_TW, p);
if (p + len > s) return p;
p += len;
return (p + ((s - p) & ~1));
}
static int
euctw_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
{
@ -111,20 +150,25 @@ euctw_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
OnigEncodingType OnigEncodingEUC_TW = {
euctw_mbc_enc_len,
euctw_mbc_enc_len_se,
"EUC-TW", /* name */
4, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_is_mbc_newline_0x0a_se,
euctw_mbc_to_code,
euctw_mbc_to_code_se,
onigenc_mb4_code_to_mbclen,
euctw_code_to_mbc,
euctw_mbc_case_fold,
euctw_mbc_case_fold_se,
onigenc_ascii_apply_all_case_fold,
onigenc_ascii_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
euctw_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
euctw_left_adjust_char_head,
euctw_left_adjust_char_head_se,
euctw_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

View File

@ -75,12 +75,34 @@ gb18030_mbc_enc_len(const UChar* p)
return 2;
}
static int
gb18030_mbc_enc_len_se(OnigIterator* it, OnigPosition p)
{
UChar c0, c1;
c0 = ONIG_CHARAT(p);
if (GB18030_MAP[c0] != CM)
return 1;
c1 = ONIG_CHARAT(p+1);
if (GB18030_MAP[c1] == C4)
return 4;
if (GB18030_MAP[c1] == C1)
return 1; /* illegal sequence */
return 2;
}
static OnigCodePoint
gb18030_mbc_to_code(const UChar* p, const UChar* end)
{
return onigenc_mbn_mbc_to_code(ONIG_ENCODING_GB18030, p, end);
}
static OnigCodePoint
gb18030_mbc_to_code_se(OnigIterator* it, OnigPosition p, OnigPosition end)
{
return onigenc_mbn_mbc_to_code_se(it, ONIG_ENCODING_GB18030, p, end);
}
static int
gb18030_code_to_mbc(OnigCodePoint code, UChar *buf)
{
@ -95,6 +117,14 @@ gb18030_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end,
pp, end, lower);
}
static int
gb18030_mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag, OnigPosition* pp, OnigPosition end,
UChar* lower)
{
return onigenc_mbn_mbc_case_fold_se(it, ONIG_ENCODING_GB18030, flag,
pp, end, lower);
}
#if 0
static int
gb18030_is_mbc_ambiguous(OnigCaseFoldType flag,
@ -469,6 +499,333 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s)
return (UChar* )s; /* never come here. (escape warning) */
}
static OnigPosition
gb18030_left_adjust_char_head_se(OnigIterator* it, OnigPosition start, OnigPosition s)
{
OnigPosition p;
UChar c;
enum state state = S_START;
DEBUG_GB18030(("----------------\n"));
for (p = s; p >= start; p--) {
c = ONIG_CHARAT(p);
DEBUG_GB18030(("state %d --(%02x)-->\n", state, c));
switch (state) {
case S_START:
switch (GB18030_MAP[c]) {
case C1:
return s;
case C2:
state = S_one_C2; /* C2 */
break;
case C4:
state = S_one_C4; /* C4 */
break;
case CM:
state = S_one_CM; /* CM */
break;
}
break;
case S_one_C2: /* C2 */
switch (GB18030_MAP[c]) {
case C1:
case C2:
case C4:
return s;
case CM:
state = S_odd_CM_one_CX; /* CM C2 */
break;
}
break;
case S_one_C4: /* C4 */
switch (GB18030_MAP[c]) {
case C1:
case C2:
case C4:
return s;
case CM:
state = S_one_CMC4;
break;
}
break;
case S_one_CM: /* CM */
switch (GB18030_MAP[c]) {
case C1:
case C2:
return s;
case C4:
state = S_odd_C4CM;
break;
case CM:
state = S_odd_CM_one_CX; /* CM CM */
break;
}
break;
case S_odd_CM_one_CX: /* CM C2 */ /* CM CM */ /* CM CM CM C4 */
switch (GB18030_MAP[c]) {
case C1:
case C2:
case C4:
return (s - 1);
case CM:
state = S_even_CM_one_CX;
break;
}
break;
case S_even_CM_one_CX: /* CM CM C2 */ /* CM CM CM */ /* CM CM C4 */
switch (GB18030_MAP[c]) {
case C1:
case C2:
case C4:
return s;
case CM:
state = S_odd_CM_one_CX;
break;
}
break;
case S_one_CMC4: /* CM C4 */
switch (GB18030_MAP[c]) {
case C1:
case C2:
return (s - 1);
case C4:
state = S_one_C4_odd_CMC4; /* C4 CM C4 */
break;
case CM:
state = S_even_CM_one_CX; /* CM CM C4 */
break;
}
break;
case S_odd_CMC4: /* CM C4 CM C4 CM C4 */
switch (GB18030_MAP[c]) {
case C1:
case C2:
return (s - 1);
case C4:
state = S_one_C4_odd_CMC4;
break;
case CM:
state = S_odd_CM_odd_CMC4;
break;
}
break;
case S_one_C4_odd_CMC4: /* C4 CM C4 */
switch (GB18030_MAP[c]) {
case C1:
case C2:
case C4:
return (s - 1);
case CM:
state = S_even_CMC4; /* CM C4 CM C4 */
break;
}
break;
case S_even_CMC4: /* CM C4 CM C4 */
switch (GB18030_MAP[c]) {
case C1:
case C2:
return (s - 3);
case C4:
state = S_one_C4_even_CMC4;
break;
case CM:
state = S_odd_CM_even_CMC4;
break;
}
break;
case S_one_C4_even_CMC4: /* C4 CM C4 CM C4 */
switch (GB18030_MAP[c]) {
case C1:
case C2:
case C4:
return (s - 3);
case CM:
state = S_odd_CMC4;
break;
}
break;
case S_odd_CM_odd_CMC4: /* CM CM C4 CM C4 CM C4 */
switch (GB18030_MAP[c]) {
case C1:
case C2:
case C4:
return (s - 3);
case CM:
state = S_even_CM_odd_CMC4;
break;
}
break;
case S_even_CM_odd_CMC4: /* CM CM CM C4 CM C4 CM C4 */
switch (GB18030_MAP[c]) {
case C1:
case C2:
case C4:
return (s - 1);
case CM:
state = S_odd_CM_odd_CMC4;
break;
}
break;
case S_odd_CM_even_CMC4: /* CM CM C4 CM C4 */
switch (GB18030_MAP[c]) {
case C1:
case C2:
case C4:
return (s - 1);
case CM:
state = S_even_CM_even_CMC4;
break;
}
break;
case S_even_CM_even_CMC4: /* CM CM CM C4 CM C4 */
switch (GB18030_MAP[c]) {
case C1:
case C2:
case C4:
return (s - 3);
case CM:
state = S_odd_CM_even_CMC4;
break;
}
break;
case S_odd_C4CM: /* C4 CM */ /* C4 CM C4 CM C4 CM*/
switch (GB18030_MAP[c]) {
case C1:
case C2:
case C4:
return s;
case CM:
state = S_one_CM_odd_C4CM; /* CM C4 CM */
break;
}
break;
case S_one_CM_odd_C4CM: /* CM C4 CM */ /* CM C4 CM C4 CM C4 CM */
switch (GB18030_MAP[c]) {
case C1:
case C2:
return (s - 2); /* |CM C4 CM */
case C4:
state = S_even_C4CM;
break;
case CM:
state = S_even_CM_odd_C4CM;
break;
}
break;
case S_even_C4CM: /* C4 CM C4 CM */
switch (GB18030_MAP[c]) {
case C1:
case C2:
case C4:
return (s - 2); /* C4|CM C4 CM */
case CM:
state = S_one_CM_even_C4CM;
break;
}
break;
case S_one_CM_even_C4CM: /* CM C4 CM C4 CM */
switch (GB18030_MAP[c]) {
case C1:
case C2:
return (s - 0); /*|CM C4 CM C4|CM */
case C4:
state = S_odd_C4CM;
break;
case CM:
state = S_even_CM_even_C4CM;
break;
}
break;
case S_even_CM_odd_C4CM: /* CM CM C4 CM */
switch (GB18030_MAP[c]) {
case C1:
case C2:
case C4:
return (s - 0); /* |CM CM|C4|CM */
case CM:
state = S_odd_CM_odd_C4CM;
break;
}
break;
case S_odd_CM_odd_C4CM: /* CM CM CM C4 CM */
switch (GB18030_MAP[c]) {
case C1:
case C2:
case C4:
return (s - 2); /* |CM CM|CM C4 CM */
case CM:
state = S_even_CM_odd_C4CM;
break;
}
break;
case S_even_CM_even_C4CM: /* CM CM C4 CM C4 CM */
switch (GB18030_MAP[c]) {
case C1:
case C2:
case C4:
return (s - 2); /* |CM CM|C4|CM C4 CM */
case CM:
state = S_odd_CM_even_C4CM;
break;
}
break;
case S_odd_CM_even_C4CM: /* CM CM CM C4 CM C4 CM */
switch (GB18030_MAP[c]) {
case C1:
case C2:
case C4:
return (s - 0); /* |CM CM|CM C4 CM C4|CM */
case CM:
state = S_even_CM_even_C4CM;
break;
}
break;
}
}
DEBUG_GB18030(("state %d\n", state));
switch (state) {
case S_START: return (s - 0);
case S_one_C2: return (s - 0);
case S_one_C4: return (s - 0);
case S_one_CM: return (s - 0);
case S_odd_CM_one_CX: return (s - 1);
case S_even_CM_one_CX: return (s - 0);
case S_one_CMC4: return (s - 1);
case S_odd_CMC4: return (s - 1);
case S_one_C4_odd_CMC4: return (s - 1);
case S_even_CMC4: return (s - 3);
case S_one_C4_even_CMC4: return (s - 3);
case S_odd_CM_odd_CMC4: return (s - 3);
case S_even_CM_odd_CMC4: return (s - 1);
case S_odd_CM_even_CMC4: return (s - 1);
case S_even_CM_even_CMC4: return (s - 3);
case S_odd_C4CM: return (s - 0);
case S_one_CM_odd_C4CM: return (s - 2);
case S_even_C4CM: return (s - 2);
case S_one_CM_even_C4CM: return (s - 0);
case S_even_CM_odd_C4CM: return (s - 0);
case S_odd_CM_odd_C4CM: return (s - 2);
case S_even_CM_even_C4CM: return (s - 2);
case S_odd_CM_even_C4CM: return (s - 0);
}
return s; /* never come here. (escape warning) */
}
static int
gb18030_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
{
@ -477,20 +834,25 @@ gb18030_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
OnigEncodingType OnigEncodingGB18030 = {
gb18030_mbc_enc_len,
gb18030_mbc_enc_len_se,
"GB18030", /* name */
4, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_is_mbc_newline_0x0a_se,
gb18030_mbc_to_code,
gb18030_mbc_to_code_se,
onigenc_mb4_code_to_mbclen,
gb18030_code_to_mbc,
gb18030_mbc_case_fold,
gb18030_mbc_case_fold_se,
onigenc_ascii_apply_all_case_fold,
onigenc_ascii_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
gb18030_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
gb18030_left_adjust_char_head,
gb18030_left_adjust_char_head_se,
gb18030_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

View File

@ -216,6 +216,24 @@ mbc_case_fold(OnigCaseFoldType flag, const UChar** pp,
return 1;
}
static int
mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag, OnigPosition* pp,
OnigPosition end ARG_UNUSED, UChar* lower)
{
const UChar c = ONIG_CHARAT(*pp);
if (c == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
*lower++ = 's';
*lower = 's';
(*pp)++;
return 2;
}
*lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(c);
(*pp)++;
return 1;
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag,
@ -254,20 +272,25 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype)
OnigEncodingType OnigEncodingISO_8859_1 = {
onigenc_single_byte_mbc_enc_len,
onigenc_single_byte_mbc_enc_len_se,
"ISO-8859-1", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_is_mbc_newline_0x0a_se,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_mbc_to_code_se,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
mbc_case_fold,
mbc_case_fold_se,
apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_left_adjust_char_head_se,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

View File

@ -121,6 +121,24 @@ mbc_case_fold(OnigCaseFoldType flag,
return 1;
}
static int
mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag,
OnigPosition* pp, OnigPosition end ARG_UNUSED, UChar* lower)
{
const UChar c = ONIG_CHARAT(*pp);
if (c == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
*lower++ = 's';
*lower = 's';
(*pp)++;
return 2;
}
*lower = ENC_ISO_8859_10_TO_LOWER_CASE(c);
(*pp)++;
return 1;
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
@ -221,20 +239,25 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncodingType OnigEncodingISO_8859_10 = {
onigenc_single_byte_mbc_enc_len,
onigenc_single_byte_mbc_enc_len_se,
"ISO-8859-10", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_is_mbc_newline_0x0a_se,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_mbc_to_code_se,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
mbc_case_fold,
mbc_case_fold_se,
apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_left_adjust_char_head_se,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

View File

@ -78,20 +78,25 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype)
OnigEncodingType OnigEncodingISO_8859_11 = {
onigenc_single_byte_mbc_enc_len,
onigenc_single_byte_mbc_enc_len_se,
"ISO-8859-11", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_is_mbc_newline_0x0a_se,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_mbc_to_code_se,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
onigenc_ascii_mbc_case_fold,
onigenc_ascii_mbc_case_fold_se,
onigenc_ascii_apply_all_case_fold,
onigenc_ascii_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_left_adjust_char_head_se,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

View File

@ -121,6 +121,24 @@ mbc_case_fold(OnigCaseFoldType flag,
return 1;
}
static int
mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag,
OnigPosition* pp, OnigPosition end ARG_UNUSED, UChar* lower)
{
const UChar c = ONIG_CHARAT(*pp);
if (c == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
*lower++ = 's';
*lower = 's';
(*pp)++;
return 2;
}
*lower = ENC_ISO_8859_13_TO_LOWER_CASE(c);
(*pp)++;
return 1;
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
@ -210,20 +228,25 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncodingType OnigEncodingISO_8859_13 = {
onigenc_single_byte_mbc_enc_len,
onigenc_single_byte_mbc_enc_len_se,
"ISO-8859-13", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_is_mbc_newline_0x0a_se,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_mbc_to_code_se,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
mbc_case_fold,
mbc_case_fold_se,
apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_left_adjust_char_head_se,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

View File

@ -121,6 +121,24 @@ mbc_case_fold(OnigCaseFoldType flag,
return 1; /* return byte length of converted char to lower */
}
static int
mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag,
OnigPosition* pp, OnigPosition end ARG_UNUSED, UChar* lower)
{
const UChar c = ONIG_CHARAT(*pp);
if (c == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
*lower++ = 's';
*lower = 's';
(*pp)++;
return 2;
}
*lower = ENC_ISO_8859_14_TO_LOWER_CASE(c);
(*pp)++;
return 1; /* return byte length of converted char to lower */
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag,
@ -223,20 +241,25 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncodingType OnigEncodingISO_8859_14 = {
onigenc_single_byte_mbc_enc_len,
onigenc_single_byte_mbc_enc_len_se,
"ISO-8859-14", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_is_mbc_newline_0x0a_se,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_mbc_to_code_se,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
mbc_case_fold,
mbc_case_fold_se,
apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_left_adjust_char_head_se,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

View File

@ -121,6 +121,24 @@ mbc_case_fold(OnigCaseFoldType flag,
return 1; /* return byte length of converted char to lower */
}
static int
mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag,
OnigPosition* pp, OnigPosition end ARG_UNUSED, UChar* lower)
{
const UChar c = ONIG_CHARAT(*pp);
if (c == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
*lower++ = 's';
*lower = 's';
(*pp)++;
return 2;
}
*lower = ENC_ISO_8859_15_TO_LOWER_CASE(c);
(*pp)++;
return 1; /* return byte length of converted char to lower */
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag,
@ -217,20 +235,25 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncodingType OnigEncodingISO_8859_15 = {
onigenc_single_byte_mbc_enc_len,
onigenc_single_byte_mbc_enc_len_se,
"ISO-8859-15", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_is_mbc_newline_0x0a_se,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_mbc_to_code_se,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
mbc_case_fold,
mbc_case_fold_se,
apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_left_adjust_char_head_se,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

View File

@ -121,6 +121,24 @@ mbc_case_fold(OnigCaseFoldType flag,
return 1; /* return byte length of converted char to lower */
}
static int
mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag,
OnigPosition* pp, OnigPosition end ARG_UNUSED, UChar* lower)
{
const UChar c = ONIG_CHARAT(*pp);
if (c == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
*lower++ = 's';
*lower = 's';
(*pp)++;
return 2;
}
*lower = ENC_ISO_8859_16_TO_LOWER_CASE(c);
(*pp)++;
return 1; /* return byte length of converted char to lower */
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
@ -219,20 +237,25 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncodingType OnigEncodingISO_8859_16 = {
onigenc_single_byte_mbc_enc_len,
onigenc_single_byte_mbc_enc_len_se,
"ISO-8859-16", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_is_mbc_newline_0x0a_se,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_mbc_to_code_se,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
mbc_case_fold,
mbc_case_fold_se,
apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_left_adjust_char_head_se,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

View File

@ -121,6 +121,24 @@ mbc_case_fold(OnigCaseFoldType flag,
return 1; /* return byte length of converted char to lower */
}
static int
mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag,
OnigPosition* pp, OnigPosition end ARG_UNUSED, UChar* lower)
{
const UChar c = ONIG_CHARAT(*pp);
if (c == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
*lower++ = 's';
*lower = 's';
(*pp)++;
return 2;
}
*lower = ENC_ISO_8859_2_TO_LOWER_CASE(c);
(*pp)++;
return 1; /* return byte length of converted char to lower */
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
@ -217,20 +235,25 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype)
OnigEncodingType OnigEncodingISO_8859_2 = {
onigenc_single_byte_mbc_enc_len,
onigenc_single_byte_mbc_enc_len_se,
"ISO-8859-2", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_is_mbc_newline_0x0a_se,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_mbc_to_code_se,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
mbc_case_fold,
mbc_case_fold_se,
apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_left_adjust_char_head_se,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

View File

@ -121,6 +121,24 @@ mbc_case_fold(OnigCaseFoldType flag, const UChar** pp,
return 1;
}
static int
mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag, OnigPosition* pp,
OnigPosition end ARG_UNUSED, UChar* lower)
{
const UChar c = ONIG_CHARAT(*pp);
if (c == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
*lower++ = 's';
*lower = 's';
(*pp)++;
return 2;
}
*lower = ENC_ISO_8859_3_TO_LOWER_CASE(c);
(*pp)++;
return 1;
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
@ -217,20 +235,25 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncodingType OnigEncodingISO_8859_3 = {
onigenc_single_byte_mbc_enc_len,
onigenc_single_byte_mbc_enc_len_se,
"ISO-8859-3", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_is_mbc_newline_0x0a_se,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_mbc_to_code_se,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
mbc_case_fold,
mbc_case_fold_se,
apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_left_adjust_char_head_se,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

View File

@ -121,6 +121,24 @@ mbc_case_fold(OnigCaseFoldType flag,
return 1; /* return byte length of converted char to lower */
}
static int
mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag,
OnigPosition* pp, OnigPosition end ARG_UNUSED, UChar* lower)
{
const UChar c = ONIG_CHARAT(*pp);
if (c == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
*lower++ = 's';
*lower = 's';
(*pp)++;
return 2;
}
*lower = ENC_ISO_8859_4_TO_LOWER_CASE(c);
(*pp)++;
return 1; /* return byte length of converted char to lower */
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
@ -219,20 +237,25 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncodingType OnigEncodingISO_8859_4 = {
onigenc_single_byte_mbc_enc_len,
onigenc_single_byte_mbc_enc_len_se,
"ISO-8859-4", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_is_mbc_newline_0x0a_se,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_mbc_to_code_se,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
mbc_case_fold,
mbc_case_fold_se,
apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_left_adjust_char_head_se,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

View File

@ -114,6 +114,17 @@ mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
return 1;
}
static int
mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag ARG_UNUSED,
OnigPosition* pp, OnigPosition end ARG_UNUSED, UChar* lower)
{
const UChar c = ONIG_CHARAT(*pp);
*lower = ENC_ISO_8859_5_TO_LOWER_CASE(c);
(*pp)++;
return 1;
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
@ -208,20 +219,25 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncodingType OnigEncodingISO_8859_5 = {
onigenc_single_byte_mbc_enc_len,
onigenc_single_byte_mbc_enc_len_se,
"ISO-8859-5", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_is_mbc_newline_0x0a_se,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_mbc_to_code_se,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
mbc_case_fold,
mbc_case_fold_se,
apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_left_adjust_char_head_se,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

View File

@ -78,20 +78,25 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype)
OnigEncodingType OnigEncodingISO_8859_6 = {
onigenc_single_byte_mbc_enc_len,
onigenc_single_byte_mbc_enc_len_se,
"ISO-8859-6", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_is_mbc_newline_0x0a_se,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_mbc_to_code_se,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
onigenc_ascii_mbc_case_fold,
onigenc_ascii_mbc_case_fold_se,
onigenc_ascii_apply_all_case_fold,
onigenc_ascii_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_left_adjust_char_head_se,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

View File

@ -114,6 +114,17 @@ mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
return 1;
}
static int
mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag ARG_UNUSED,
OnigPosition* pp, OnigPosition end ARG_UNUSED, UChar* lower)
{
const UChar c = ONIG_CHARAT(*pp);
*lower = ENC_ISO_8859_7_TO_LOWER_CASE(c);
(*pp)++;
return 1;
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag,
@ -204,20 +215,25 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncodingType OnigEncodingISO_8859_7 = {
onigenc_single_byte_mbc_enc_len,
onigenc_single_byte_mbc_enc_len_se,
"ISO-8859-7", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_is_mbc_newline_0x0a_se,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_mbc_to_code_se,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
mbc_case_fold,
mbc_case_fold_se,
apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_left_adjust_char_head_se,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

View File

@ -78,20 +78,25 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype)
OnigEncodingType OnigEncodingISO_8859_8 = {
onigenc_single_byte_mbc_enc_len,
onigenc_single_byte_mbc_enc_len_se,
"ISO-8859-8", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_is_mbc_newline_0x0a_se,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_mbc_to_code_se,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
onigenc_ascii_mbc_case_fold,
onigenc_ascii_mbc_case_fold_se,
onigenc_ascii_apply_all_case_fold,
onigenc_ascii_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_left_adjust_char_head_se,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

View File

@ -121,6 +121,24 @@ mbc_case_fold(OnigCaseFoldType flag,
return 1;
}
static int
mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag,
OnigPosition* pp, OnigPosition end ARG_UNUSED, UChar* lower)
{
const UChar c = ONIG_CHARAT(*pp);
if (c == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
*lower++ = 's';
*lower = 's';
(*pp)++;
return 2;
}
*lower = ENC_ISO_8859_9_TO_LOWER_CASE(c);
(*pp)++;
return 1;
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
@ -210,20 +228,25 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncodingType OnigEncodingISO_8859_9 = {
onigenc_single_byte_mbc_enc_len,
onigenc_single_byte_mbc_enc_len_se,
"ISO-8859-9", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_is_mbc_newline_0x0a_se,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_mbc_to_code_se,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
mbc_case_fold,
mbc_case_fold_se,
apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_left_adjust_char_head_se,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

View File

@ -115,6 +115,17 @@ koi8_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
return 1;
}
static int
koi8_mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag ARG_UNUSED,
OnigPosition* pp, OnigPosition end ARG_UNUSED, UChar* lower)
{
const UChar c = ONIG_CHARAT(*pp);
*lower = ENC_KOI8_TO_LOWER_CASE(c);
(*pp)++;
return 1;
}
#if 0
static int
koi8_is_mbc_ambiguous(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end)
@ -232,20 +243,25 @@ koi8_get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncodingType OnigEncodingKOI8 = {
onigenc_single_byte_mbc_enc_len,
onigenc_single_byte_mbc_enc_len_se,
"KOI8", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_is_mbc_newline_0x0a_se,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_mbc_to_code_se,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
koi8_mbc_case_fold,
koi8_mbc_case_fold_se,
koi8_apply_all_case_fold,
koi8_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
koi8_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_left_adjust_char_head_se,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

View File

@ -114,6 +114,17 @@ koi8_r_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
return 1;
}
static int
koi8_r_mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag ARG_UNUSED,
OnigPosition* pp, OnigPosition end ARG_UNUSED, UChar* lower)
{
const UChar c = ONIG_CHARAT(*pp);
*lower = ENC_KOI8_R_TO_LOWER_CASE(c);
(*pp)++;
return 1;
}
#if 0
static int
koi8_r_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
@ -194,20 +205,25 @@ koi8_r_get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncodingType OnigEncodingKOI8_R = {
onigenc_single_byte_mbc_enc_len,
onigenc_single_byte_mbc_enc_len_se,
"KOI8-R", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_is_mbc_newline_0x0a_se,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_mbc_to_code_se,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
koi8_r_mbc_case_fold,
koi8_r_mbc_case_fold_se,
koi8_r_apply_all_case_fold,
koi8_r_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
koi8_r_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_left_adjust_char_head_se,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

View File

@ -174,6 +174,12 @@ mbc_enc_len(const UChar* p)
return EncLen_SJIS[*p];
}
static int
mbc_enc_len_se(OnigIterator* it, OnigPosition p)
{
return EncLen_SJIS[ONIG_CHARAT(p)];
}
static int
code_to_mbclen(OnigCodePoint code)
{
@ -212,6 +218,25 @@ mbc_to_code(const UChar* p, const UChar* end)
return n;
}
static OnigCodePoint
mbc_to_code_se(OnigIterator* it, OnigPosition p, OnigPosition end)
{
int c, i, len;
OnigCodePoint n;
len = mbc_enc_len_se(it, p);
c = ONIG_CHARAT(p++);
n = c;
if (len == 1) return n;
for (i = 1; i < len; i++) {
if (p >= end) break;
c = ONIG_CHARAT(p++);
n <<= 8; n += c;
}
return n;
}
static int
code_to_mbc(OnigCodePoint code, UChar *buf)
{
@ -309,7 +334,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
static int
mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
const UChar** pp, const UChar* end, UChar* lower)
{
const UChar* p = *pp;
@ -329,6 +354,28 @@ mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
}
}
static int
mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag ARG_UNUSED,
OnigPosition* pp, OnigPosition end, UChar* lower)
{
const UChar c = ONIG_CHARAT(*pp);
if (ONIGENC_IS_MBC_ASCII_SE(c)) {
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(c);
(*pp)++;
return 1;
}
else {
OnigCodePoint code;
int len;
code = get_lower_case(mbc_to_code_se(it, *pp, end));
len = code_to_mbc(code, lower);
(*pp) += len;
return len; /* return byte length of converted char to lower */
}
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag,
@ -377,6 +424,29 @@ left_adjust_char_head(const UChar* start, const UChar* s)
return (UChar* )(p + ((s - p) & ~1));
}
static OnigPosition
left_adjust_char_head_se(OnigIterator* it, OnigPosition start, OnigPosition s)
{
OnigPosition p;
int len;
if (s <= start) return s;
p = s;
if (SJIS_ISMB_TRAIL(ONIG_CHARAT(p))) {
while (p > start) {
if (! SJIS_ISMB_FIRST(ONIG_CHARAT(--p))) {
p++;
break;
}
}
}
len = mbc_enc_len_se(it, p);
if (p + len > s) return p;
p += len;
return (p + ((s - p) & ~1));
}
static int
is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
{
@ -531,40 +601,50 @@ get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out,
#ifdef ENC_CP932
OnigEncodingType OnigEncodingCP932 = {
mbc_enc_len,
mbc_enc_len_se,
"CP932", /* name */
2, /* max byte length */
1, /* min byte length */
onigenc_is_mbc_newline_0x0a,
onigenc_is_mbc_newline_0x0a_se,
mbc_to_code,
mbc_to_code_se,
code_to_mbclen,
code_to_mbc,
mbc_case_fold,
mbc_case_fold_se,
apply_all_case_fold,
get_case_fold_codes_by_str,
property_name_to_ctype,
is_code_ctype,
get_ctype_code_range,
left_adjust_char_head,
left_adjust_char_head_se,
is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};
#else
OnigEncodingType OnigEncodingSJIS = {
mbc_enc_len,
mbc_enc_len_se,
"Shift_JIS", /* name */
2, /* max byte length */
1, /* min byte length */
onigenc_is_mbc_newline_0x0a,
onigenc_is_mbc_newline_0x0a_se,
mbc_to_code,
mbc_to_code_se,
code_to_mbclen,
code_to_mbc,
mbc_case_fold,
mbc_case_fold_se,
apply_all_case_fold,
get_case_fold_codes_by_str,
property_name_to_ctype,
is_code_ctype,
get_ctype_code_range,
left_adjust_char_head,
left_adjust_char_head_se,
is_allowed_reverse_match,
ONIGENC_FLAG_NONE,
};

View File

@ -108,6 +108,7 @@ typedef struct {
#define numberof(array) (int)(sizeof(array) / sizeof((array)[0]))
#define CODE_RANGES_NUM numberof(CodeRanges)
#define CODE_SCRIPTS_NUM numberof(CodeScripts)
extern int
onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype)
@ -148,6 +149,17 @@ onigenc_utf16_32_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out,
return onigenc_unicode_ctype_code_range(ctype, ranges);
}
extern const OnigCodePoint*
onigenc_unicode_code_script(OnigCodePoint code)
{
for (int ctype = 0; ctype < CODE_SCRIPTS_NUM; ctype++) {
if (onig_is_in_code_range((UChar*) CodeScripts[ctype], code)) {
return CodeScripts[ctype];
}
}
return 0;
}
#include "st.h"
#define PROPERTY_NAME_MAX_SIZE (MAX_WORD_LENGTH + 1)
@ -220,12 +232,19 @@ static struct st_hash_type type_code3_hash = {
code3_hash,
};
static st_table* FoldTable; /* fold-1, fold-2, fold-3 */
static st_table* Unfold1Table;
static st_table* Unfold2Table;
static st_table* Unfold3Table;
static int CaseFoldInited = 0;
#ifdef USE_SHARED_UNICODE_TABLE
st_table* FoldTable; /* fold-1, fold-2, fold-3 */
st_table* Unfold1Table;
st_table* Unfold2Table;
st_table* Unfold3Table;
int CaseFoldInited = 0;
#else
static st_table* FoldTable; /* fold-1, fold-2, fold-3 */
static st_table* Unfold1Table;
static st_table* Unfold2Table;
static st_table* Unfold3Table;
static int CaseFoldInited = 0;
#endif //USE_SHARED_UNICODE_TABLE
static int init_case_fold_table(void)
{
@ -338,6 +357,59 @@ onigenc_unicode_mbc_case_fold(OnigEncoding enc,
return len;
}
extern int
onigenc_unicode_mbc_case_fold_se(OnigIterator* it, OnigEncoding enc,
OnigCaseFoldType flag ARG_UNUSED, OnigPosition* pp, OnigPosition end,
UChar* fold)
{
CodePointList3 *to;
OnigCodePoint code;
int i, len, rlen;
OnigPosition p = *pp;
if (CaseFoldInited == 0) init_case_fold_table();
code = ONIGENC_MBC_TO_CODE_SE(it, enc, p, end);
len = enclen_se(it, enc, p);
*pp += len;
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
if (code == 0x0049) {
return ONIGENC_CODE_TO_MBC(enc, 0x0131, fold);
}
else if (code == 0x0130) {
return ONIGENC_CODE_TO_MBC(enc, 0x0069, fold);
}
}
#endif
if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0) {
if (to->n == 1) {
return ONIGENC_CODE_TO_MBC(enc, to->code[0], fold);
}
#if 0
/* NO NEEDS TO CHECK */
else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
#else
else {
#endif
rlen = 0;
for (i = 0; i < to->n; i++) {
len = ONIGENC_CODE_TO_MBC(enc, to->code[i], fold);
fold += len;
rlen += len;
}
return rlen;
}
}
for (i = 0; i < len; i++) {
*fold++ = ONIG_CHARAT(p++);
}
return len;
}
extern int
onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg)

View File

@ -25940,6 +25940,112 @@ static const OnigCodePoint* const CodeRanges[] = {
CR_In_No_Block,
#endif /* USE_UNICODE_PROPERTIES */
};
static const OnigCodePoint* const CodeScripts[] = {
CR_Common,
CR_Latin,
CR_Greek,
CR_Cyrillic,
CR_Armenian,
CR_Hebrew,
CR_Arabic,
CR_Syriac,
CR_Thaana,
CR_Devanagari,
CR_Bengali,
CR_Gurmukhi,
CR_Gujarati,
CR_Oriya,
CR_Tamil,
CR_Telugu,
CR_Kannada,
CR_Malayalam,
CR_Sinhala,
CR_Thai,
CR_Lao,
CR_Tibetan,
CR_Myanmar,
CR_Georgian,
CR_Hangul,
CR_Ethiopic,
CR_Cherokee,
CR_Canadian_Aboriginal,
CR_Ogham,
CR_Runic,
CR_Khmer,
CR_Mongolian,
CR_Hiragana,
CR_Katakana,
CR_Bopomofo,
CR_Han,
CR_Yi,
CR_Old_Italic,
CR_Gothic,
CR_Deseret,
CR_Inherited,
CR_Tagalog,
CR_Hanunoo,
CR_Buhid,
CR_Tagbanwa,
CR_Limbu,
CR_Tai_Le,
CR_Linear_B,
CR_Ugaritic,
CR_Shavian,
CR_Osmanya,
CR_Cypriot,
CR_Braille,
CR_Buginese,
CR_Coptic,
CR_New_Tai_Lue,
CR_Glagolitic,
CR_Tifinagh,
CR_Syloti_Nagri,
CR_Old_Persian,
CR_Kharoshthi,
CR_Balinese,
CR_Cuneiform,
CR_Phoenician,
CR_Phags_Pa,
CR_Nko,
CR_Sundanese,
CR_Lepcha,
CR_Ol_Chiki,
CR_Vai,
CR_Saurashtra,
CR_Kayah_Li,
CR_Rejang,
CR_Lycian,
CR_Carian,
CR_Lydian,
CR_Cham,
CR_Tai_Tham,
CR_Tai_Viet,
CR_Avestan,
CR_Egyptian_Hieroglyphs,
CR_Samaritan,
CR_Lisu,
CR_Bamum,
CR_Javanese,
CR_Meetei_Mayek,
CR_Imperial_Aramaic,
CR_Old_South_Arabian,
CR_Inscriptional_Parthian,
CR_Inscriptional_Pahlavi,
CR_Old_Turkic,
CR_Kaithi,
CR_Batak,
CR_Brahmi,
CR_Mandaic,
CR_Chakma,
CR_Meroitic_Cursive,
CR_Meroitic_Hieroglyphs,
CR_Miao,
CR_Sharada,
CR_Sora_Sompeng,
CR_Takri
};
struct uniname2ctype_struct {
int name, ctype;
};

View File

@ -54,6 +54,12 @@ utf16be_mbc_enc_len(const UChar* p)
return EncLen_UTF16[*p];
}
static int
utf16be_mbc_enc_len_se(OnigIterator* it, OnigPosition p)
{
return EncLen_UTF16[ONIG_CHARAT(p)];
}
static int
utf16be_is_mbc_newline(const UChar* p, const UChar* end)
{
@ -71,6 +77,26 @@ utf16be_is_mbc_newline(const UChar* p, const UChar* end)
return 0;
}
static int
utf16be_is_mbc_newline_se(OnigIterator* it, OnigPosition p, OnigPosition end)
{
if (p + 1 < end) {
const UChar c0 = ONIG_CHARAT(p);
const UChar c1 = ONIG_CHARAT(p+1);
if (c1 == 0x0a && c0 == 0x00)
return 1;
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
if ((c1 == 0x0b || c1 == 0x0c || c1 == 0x0d || c1 == 0x85)
&& c0 == 0x00)
return 1;
if (c0 == 0x20 && (c1 == 0x29 || c1 == 0x28))
return 1;
#endif
}
return 0;
}
static OnigCodePoint
utf16be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
{
@ -87,6 +113,24 @@ utf16be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
return code;
}
static OnigCodePoint
utf16be_mbc_to_code_se(OnigIterator* it, OnigPosition p, OnigPosition end ARG_UNUSED)
{
OnigCodePoint code;
const UChar c0 = ONIG_CHARAT(p);
const UChar c1 = ONIG_CHARAT(p+1);
if (UTF16_IS_SURROGATE_FIRST(c0)) {
code = ((((c0 - 0xd8) << 2) + ((c1 & 0xc0) >> 6) + 1) << 16)
+ ((((c1 & 0x3f) << 2) + (ONIG_CHARAT(p+2) - 0xdc)) << 8)
+ ONIG_CHARAT(p+3);
}
else {
code = c0 * 256 + c1;
}
return code;
}
static int
utf16be_code_to_mbclen(OnigCodePoint code)
{
@ -145,6 +189,35 @@ utf16be_mbc_case_fold(OnigCaseFoldType flag,
pp, end, fold);
}
static int
utf16be_mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag,
OnigPosition* pp, OnigPosition end, UChar* fold)
{
const UChar c0 = ONIG_CHARAT(*pp);
const UChar c1 = ONIG_CHARAT(*pp+1);
if (ONIGENC_IS_ASCII_CODE(c1) && c0 == 0) {
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
if (c1 == 0x49) {
*fold++ = 0x01;
*fold = 0x31;
(*pp) += 2;
return 2;
}
}
#endif
*fold++ = 0;
*fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(c1);
*pp += 2;
return 2;
}
else
return onigenc_unicode_mbc_case_fold_se(it, ONIG_ENCODING_UTF16_BE, flag,
pp, end, fold);
}
#if 0
static int
utf16be_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
@ -194,6 +267,21 @@ utf16be_left_adjust_char_head(const UChar* start, const UChar* s)
return (UChar* )s;
}
static OnigPosition
utf16be_left_adjust_char_head_se(OnigIterator* it, OnigPosition start, OnigPosition s)
{
if (s <= start) return s;
if ((s - start) % 2 == 1) {
s--;
}
if (UTF16_IS_SURROGATE_SECOND(ONIG_CHARAT(s)) && s > start + 1)
s -= 2;
return s;
}
static int
utf16be_get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
@ -204,20 +292,25 @@ utf16be_get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncodingType OnigEncodingUTF16_BE = {
utf16be_mbc_enc_len,
utf16be_mbc_enc_len_se,
"UTF-16BE", /* name */
4, /* max byte length */
2, /* min byte length */
utf16be_is_mbc_newline,
utf16be_is_mbc_newline_se,
utf16be_mbc_to_code,
utf16be_mbc_to_code_se,
utf16be_code_to_mbclen,
utf16be_code_to_mbc,
utf16be_mbc_case_fold,
utf16be_mbc_case_fold_se,
onigenc_unicode_apply_all_case_fold,
utf16be_get_case_fold_codes_by_str,
onigenc_unicode_property_name_to_ctype,
onigenc_unicode_is_code_ctype,
onigenc_utf16_32_get_ctype_code_range,
utf16be_left_adjust_char_head,
utf16be_left_adjust_char_head_se,
onigenc_always_false_is_allowed_reverse_match,
ONIGENC_FLAG_UNICODE,
};

View File

@ -60,6 +60,12 @@ utf16le_mbc_enc_len(const UChar* p)
return EncLen_UTF16[*(p+1)];
}
static int
utf16le_mbc_enc_len_se(OnigIterator* it, OnigPosition p)
{
return EncLen_UTF16[ONIG_CHARAT(p+1)];
}
static int
utf16le_is_mbc_newline(const UChar* p, const UChar* end)
{
@ -77,6 +83,26 @@ utf16le_is_mbc_newline(const UChar* p, const UChar* end)
return 0;
}
static int
utf16le_is_mbc_newline_se(OnigIterator* it, OnigPosition p, OnigPosition end)
{
if (p + 1 < end) {
const UChar c0 = ONIG_CHARAT(p);
const UChar c1 = ONIG_CHARAT(p+1);
if (c0 == 0x0a && c1 == 0x00)
return 1;
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
if ((c0 == 0x0b || c0 == 0x0c || c0 == 0x0d || c0 == 0x85)
&& c1 == 0x00)
return 1;
if (c1 == 0x20 && (c0 == 0x29 || c0 == 0x28))
return 1;
#endif
}
return 0;
}
static OnigCodePoint
utf16le_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
{
@ -95,6 +121,24 @@ utf16le_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
return code;
}
static OnigCodePoint
utf16le_mbc_to_code_se(OnigIterator* it, OnigPosition p, OnigPosition end ARG_UNUSED)
{
OnigCodePoint code;
const UChar c0 = ONIG_CHARAT(p);
const UChar c1 = ONIG_CHARAT(p+1);
if (UTF16_IS_SURROGATE_FIRST(c1)) {
code = ((((c1 - 0xd8) << 2) + ((c0 & 0xc0) >> 6) + 1) << 16)
+ ((((c0 & 0x3f) << 2) + (ONIG_CHARAT(p+3) - 0xdc)) << 8)
+ ONIG_CHARAT(p+2);
}
else {
code = c1 * 256 + c0;
}
return code;
}
static int
utf16le_code_to_mbc(OnigCodePoint code, UChar *buf)
{
@ -147,6 +191,34 @@ utf16le_mbc_case_fold(OnigCaseFoldType flag,
fold);
}
static int
utf16le_mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag,
OnigPosition* pp, OnigPosition end, UChar* fold)
{
const UChar c = ONIG_CHARAT(*pp);
if (ONIGENC_IS_ASCII_CODE(c) && ONIG_CHARAT(*pp+1) == 0) {
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
if (c == 0x49) {
*fold++ = 0x31;
*fold = 0x01;
(*pp) += 2;
return 2;
}
}
#endif
*fold++ = ONIGENC_ASCII_CODE_TO_LOWER_CASE(c);
*fold = 0;
*pp += 2;
return 2;
}
else
return onigenc_unicode_mbc_case_fold_se(it, ONIG_ENCODING_UTF16_LE, flag, pp, end,
fold);
}
#if 0
static int
utf16le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp,
@ -195,6 +267,21 @@ utf16le_left_adjust_char_head(const UChar* start, const UChar* s)
return (UChar* )s;
}
static OnigPosition
utf16le_left_adjust_char_head_se(OnigIterator* it, OnigPosition start, OnigPosition s)
{
if (s <= start) return s;
if ((s - start) % 2 == 1) {
s--;
}
if (UTF16_IS_SURROGATE_SECOND(ONIG_CHARAT(s+1)) && s > start + 1)
s -= 2;
return s;
}
static int
utf16le_get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
@ -205,20 +292,25 @@ utf16le_get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncodingType OnigEncodingUTF16_LE = {
utf16le_mbc_enc_len,
utf16le_mbc_enc_len_se,
"UTF-16LE", /* name */
4, /* max byte length */
2, /* min byte length */
utf16le_is_mbc_newline,
utf16le_is_mbc_newline_se,
utf16le_mbc_to_code,
utf16le_mbc_to_code_se,
utf16le_code_to_mbclen,
utf16le_code_to_mbc,
utf16le_mbc_case_fold,
utf16le_mbc_case_fold_se,
onigenc_unicode_apply_all_case_fold,
utf16le_get_case_fold_codes_by_str,
onigenc_unicode_property_name_to_ctype,
onigenc_unicode_is_code_ctype,
onigenc_utf16_32_get_ctype_code_range,
utf16le_left_adjust_char_head,
utf16le_left_adjust_char_head_se,
onigenc_always_false_is_allowed_reverse_match,
ONIGENC_FLAG_UNICODE,
};

View File

@ -35,6 +35,12 @@ utf32be_mbc_enc_len(const UChar* p ARG_UNUSED)
return 4;
}
static int
utf32be_mbc_enc_len_se(OnigIterator* it ARG_UNUSED, OnigPosition p ARG_UNUSED)
{
return 4;
}
static int
utf32be_is_mbc_newline(const UChar* p, const UChar* end)
{
@ -53,12 +59,39 @@ utf32be_is_mbc_newline(const UChar* p, const UChar* end)
return 0;
}
static int
utf32be_is_mbc_newline_se(OnigIterator* it, OnigPosition p, OnigPosition end)
{
if (p + 3 < end) {
const UChar c2 = ONIG_CHARAT(p+2);
const UChar c3 = ONIG_CHARAT(p+3);
if (c3 == 0x0a && c2 == 0 && ONIG_CHARAT(p+1) == 0 && ONIG_CHARAT(p) == 0)
return 1;
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
if ((c3 == 0x0b || c3 == 0x0c || c3 == 0x0d || c3 == 0x85)
&& c2 == 0 && ONIG_CHARAT(p+1) == 0 && ONIG_CHARAT(p) == 0x00)
return 1;
if (c2 == 0x20 && (c3 == 0x29 || c3 == 0x28)
&& ONIG_CHARAT(p+1) == 0 && ONIG_CHARAT(p) == 0)
return 1;
#endif
}
return 0;
}
static OnigCodePoint
utf32be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
{
return (OnigCodePoint )(((p[0] * 256 + p[1]) * 256 + p[2]) * 256 + p[3]);
}
static OnigCodePoint
utf32be_mbc_to_code_se(OnigIterator* it, OnigPosition p, OnigPosition end ARG_UNUSED)
{
return (OnigCodePoint )(((ONIG_CHARAT(p) * 256 + ONIG_CHARAT(p+1)) * 256 + ONIG_CHARAT(p+2)) * 256 + ONIG_CHARAT(p+3));
}
static int
utf32be_code_to_mbclen(OnigCodePoint code ARG_UNUSED)
{
@ -108,6 +141,38 @@ utf32be_mbc_case_fold(OnigCaseFoldType flag,
fold);
}
static int
utf32be_mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag,
OnigPosition* pp, OnigPosition end, UChar* fold)
{
OnigPosition p = *pp;
const UChar c3 = ONIG_CHARAT(p+3);
if (ONIGENC_IS_ASCII_CODE(c3) && ONIG_CHARAT(p+2) == 0 && ONIG_CHARAT(p+1) == 0 && ONIG_CHARAT(p) == 0) {
*fold++ = 0;
*fold++ = 0;
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
if (c3 == 0x49) {
*fold++ = 0x01;
*fold = 0x31;
(*pp) += 4;
return 4;
}
}
#endif
*fold++ = 0;
*fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(c3);
*pp += 4;
return 4;
}
else
return onigenc_unicode_mbc_case_fold_se(it, ONIG_ENCODING_UTF32_BE, flag, pp, end,
fold);
}
#if 0
static int
utf32be_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
@ -152,6 +217,17 @@ utf32be_left_adjust_char_head(const UChar* start, const UChar* s)
return (UChar* )(s - rem);
}
static OnigPosition
utf32be_left_adjust_char_head_se(OnigIterator* it, OnigPosition start, OnigPosition s)
{
int rem;
if (s <= start) return s;
rem = (int )((s - start) % 4);
return (s - rem);
}
static int
utf32be_get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
@ -162,20 +238,25 @@ utf32be_get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncodingType OnigEncodingUTF32_BE = {
utf32be_mbc_enc_len,
utf32be_mbc_enc_len_se,
"UTF-32BE", /* name */
4, /* max byte length */
4, /* min byte length */
utf32be_is_mbc_newline,
utf32be_is_mbc_newline_se,
utf32be_mbc_to_code,
utf32be_mbc_to_code_se,
utf32be_code_to_mbclen,
utf32be_code_to_mbc,
utf32be_mbc_case_fold,
utf32be_mbc_case_fold_se,
onigenc_unicode_apply_all_case_fold,
utf32be_get_case_fold_codes_by_str,
onigenc_unicode_property_name_to_ctype,
onigenc_unicode_is_code_ctype,
onigenc_utf16_32_get_ctype_code_range,
utf32be_left_adjust_char_head,
utf32be_left_adjust_char_head_se,
onigenc_always_false_is_allowed_reverse_match,
ONIGENC_FLAG_UNICODE,
};

View File

@ -35,6 +35,12 @@ utf32le_mbc_enc_len(const UChar* p ARG_UNUSED)
return 4;
}
static int
utf32le_mbc_enc_len_se(OnigIterator* it ARG_UNUSED, OnigPosition p ARG_UNUSED)
{
return 4;
}
static int
utf32le_is_mbc_newline(const UChar* p, const UChar* end)
{
@ -43,7 +49,7 @@ utf32le_is_mbc_newline(const UChar* p, const UChar* end)
return 1;
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
if ((*p == 0x0b ||*p == 0x0c ||*p == 0x0d || *p == 0x85)
&& *(p+1) == 0x00 && (p+2) == 0x00 && *(p+3) == 0x00)
&& *(p+1) == 0x00 && *(p+2) == 0x00 && *(p+3) == 0x00)
return 1;
if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28)
&& *(p+2) == 0x00 && *(p+3) == 0x00)
@ -53,12 +59,39 @@ utf32le_is_mbc_newline(const UChar* p, const UChar* end)
return 0;
}
static int
utf32le_is_mbc_newline_se(OnigIterator* it, OnigPosition p, OnigPosition end)
{
if (p + 3 < end) {
const UChar c0 = ONIG_CHARAT(p);
const UChar c1 = ONIG_CHARAT(p+1);
if (c0 == 0x0a && c1 == 0 && ONIG_CHARAT(p+2) == 0 && ONIG_CHARAT(p+3) == 0)
return 1;
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
if ((c0 == 0x0b || c0 == 0x0c || c0 == 0x0d || c0 == 0x85)
&& c1 == 0x00 && ONIG_CHARAT(p+2) == 0x00 && ONIG_CHARAT(p+3) == 0x00)
return 1;
if (c1 == 0x20 && (c0 == 0x29 || c0 == 0x28)
&& ONIG_CHARAT(p+2) == 0x00 && ONIG_CHARAT(p+3) == 0x00)
return 1;
#endif
}
return 0;
}
static OnigCodePoint
utf32le_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
{
return (OnigCodePoint )(((p[3] * 256 + p[2]) * 256 + p[1]) * 256 + p[0]);
}
static OnigCodePoint
utf32le_mbc_to_code_se(OnigIterator* it, OnigPosition p, OnigPosition end ARG_UNUSED)
{
return (OnigCodePoint )(((ONIG_CHARAT(p+3) * 256 + ONIG_CHARAT(p+2)) * 256 + ONIG_CHARAT(p+1)) * 256 + ONIG_CHARAT(p));
}
static int
utf32le_code_to_mbclen(OnigCodePoint code ARG_UNUSED)
{
@ -109,6 +142,39 @@ utf32le_mbc_case_fold(OnigCaseFoldType flag,
fold);
}
static int
utf32le_mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag,
OnigPosition* pp, OnigPosition end, UChar* fold)
{
const OnigPosition p = *pp;
const UChar c = ONIG_CHARAT(p);
if (ONIGENC_IS_ASCII_CODE(c) && ONIG_CHARAT(p+1) == 0 && ONIG_CHARAT(p+2) == 0 && ONIG_CHARAT(p+3) == 0) {
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
if (c == 0x49) {
*fold++ = 0x31;
*fold++ = 0x01;
}
}
else {
#endif
*fold++ = ONIGENC_ASCII_CODE_TO_LOWER_CASE(c);
*fold++ = 0;
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
}
#endif
*fold++ = 0;
*fold = 0;
*pp += 4;
return 4;
}
else
return onigenc_unicode_mbc_case_fold_se(it, ONIG_ENCODING_UTF32_LE, flag, pp, end,
fold);
}
#if 0
static int
utf32le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
@ -152,6 +218,17 @@ utf32le_left_adjust_char_head(const UChar* start, const UChar* s)
return (UChar* )(s - rem);
}
static OnigPosition
utf32le_left_adjust_char_head_se(OnigIterator* it, OnigPosition start, OnigPosition s)
{
int rem;
if (s <= start) return s;
rem = (int )((s - start) % 4);
return (s - rem);
}
static int
utf32le_get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
@ -162,20 +239,25 @@ utf32le_get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncodingType OnigEncodingUTF32_LE = {
utf32le_mbc_enc_len,
utf32le_mbc_enc_len_se,
"UTF-32LE", /* name */
4, /* max byte length */
4, /* min byte length */
utf32le_is_mbc_newline,
utf32le_is_mbc_newline_se,
utf32le_mbc_to_code,
utf32le_mbc_to_code_se,
utf32le_code_to_mbclen,
utf32le_code_to_mbc,
utf32le_mbc_case_fold,
utf32le_mbc_case_fold_se,
onigenc_unicode_apply_all_case_fold,
utf32le_get_case_fold_codes_by_str,
onigenc_unicode_property_name_to_ctype,
onigenc_unicode_is_code_ctype,
onigenc_utf16_32_get_ctype_code_range,
utf32le_left_adjust_char_head,
utf32le_left_adjust_char_head_se,
onigenc_always_false_is_allowed_reverse_match,
ONIGENC_FLAG_UNICODE,
};

View File

@ -65,6 +65,12 @@ mbc_enc_len(const UChar* p)
return EncLen_UTF8[*p];
}
static int
mbc_enc_len_se(OnigIterator* it, OnigPosition p)
{
return EncLen_UTF8[ONIG_CHARAT(p)];
}
static int
is_mbc_newline(const UChar* p, const UChar* end)
{
@ -88,6 +94,32 @@ is_mbc_newline(const UChar* p, const UChar* end)
return 0;
}
static int
is_mbc_newline_se(OnigIterator* it, OnigPosition p, OnigPosition end)
{
if (p < end) {
const UChar c0 = ONIG_CHARAT(p);
if (c0 == 0x0a) return 1;
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
if (c0 == 0x0b || c0 == 0x0c || c0 == 0x0d) return 1;
if (p + 1 < end) {
const UChar c1 = ONIG_CHARAT(p+1);
if (c1 == 0x85 && c0 == 0xc2) /* U+0085 */
return 1;
if (p + 2 < end) {
const UChar c2 = ONIG_CHARAT(p+2);
if ((c2 == 0xa8 || c2 == 0xa9)
&& c1 == 0x80 && c0 == 0xe2) /* U+2028, U+2029 */
return 1;
}
}
#endif
}
return 0;
}
static OnigCodePoint
mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
{
@ -115,6 +147,33 @@ mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
}
}
static OnigCodePoint
mbc_to_code_se(OnigIterator* it, OnigPosition p, OnigPosition end ARG_UNUSED)
{
int c, len;
OnigCodePoint n;
len = mbc_enc_len_se(it, p);
c = ONIG_CHARAT(p++);
if (len > 1) {
len--;
n = c & ((1 << (6 - len)) - 1);
while (len--) {
c = ONIG_CHARAT(p++);
n = (n << 6) | (c & ((1 << 6) - 1));
}
return n;
}
else {
#ifdef USE_INVALID_CODE_SCHEME
if (c > 0xfd) {
return ((c == 0xfe) ? INVALID_CODE_FE : INVALID_CODE_FF);
}
#endif
return (OnigCodePoint )c;
}
}
static int
code_to_mbclen(OnigCodePoint code)
{
@ -217,6 +276,34 @@ mbc_case_fold(OnigCaseFoldType flag, const UChar** pp,
}
}
static int
mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag, OnigPosition* pp,
OnigPosition end, UChar* fold)
{
const UChar c = ONIG_CHARAT(*pp);
if (ONIGENC_IS_MBC_ASCII_SE(c)) {
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
if (c == 0x49) {
*fold++ = 0xc4;
*fold = 0xb1;
(*pp)++;
return 2;
}
}
#endif
*fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(c);
(*pp)++;
return 1; /* return byte length of converted char to lower */
}
else {
return onigenc_unicode_mbc_case_fold_se(it, ONIG_ENCODING_UTF8, flag,
pp, end, fold);
}
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
@ -275,6 +362,18 @@ left_adjust_char_head(const UChar* start, const UChar* s)
return (UChar* )p;
}
static OnigPosition
left_adjust_char_head_se(OnigIterator* it, OnigPosition start, OnigPosition s)
{
OnigPosition p;
if (s <= start) return s;
p = s;
while (!utf8_islead(ONIG_CHARAT(p)) && p > start) p--;
return p;
}
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
@ -285,20 +384,25 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncodingType OnigEncodingUTF8 = {
mbc_enc_len,
mbc_enc_len_se,
"UTF-8", /* name */
6, /* max byte length */
1, /* min byte length */
is_mbc_newline,
is_mbc_newline_se,
mbc_to_code,
mbc_to_code_se,
code_to_mbclen,
code_to_mbc,
mbc_case_fold,
mbc_case_fold_se,
onigenc_unicode_apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_unicode_property_name_to_ctype,
onigenc_unicode_is_code_ctype,
get_ctype_code_range,
left_adjust_char_head,
left_adjust_char_head_se,
onigenc_always_true_is_allowed_reverse_match,
ONIGENC_FLAG_UNICODE,
};

View File

@ -68,9 +68,9 @@ void re_free_pattern P_((struct re_pattern_buffer*));
ONIG_EXTERN
int re_adjust_startpos P_((struct re_pattern_buffer*, const char*, int, int, int));
ONIG_EXTERN
int re_search P_((struct re_pattern_buffer*, const char*, int, int, int, struct re_registers*));
int re_search P_((OnigIterator* it, struct re_pattern_buffer*, OnigPosition, OnigPosition, OnigPosition, OnigPosition, struct re_registers*));
ONIG_EXTERN
int re_match P_((struct re_pattern_buffer*, const char *, int, int, struct re_registers*));
int re_match P_((OnigIterator* it, struct re_pattern_buffer*, OnigPosition, OnigPosition, OnigPosition, struct re_registers*));
ONIG_EXTERN
void re_set_casetable P_((const char*));
ONIG_EXTERN

View File

@ -152,7 +152,7 @@ ONIG_EXTERN const char* onig_copyright P_((void));
ONIG_EXTERN int regcomp P_((regex_t* reg, const char* pat, int options));
ONIG_EXTERN int regexec P_((regex_t* reg, const char* str, size_t nmatch, regmatch_t* matches, int options));
ONIG_EXTERN OnigPosition regexec P_((OnigIterator* it, regex_t* reg, OnigPosition str, size_t nmatch, regmatch_t* matches, int options));
ONIG_EXTERN void regfree P_((regex_t* reg));
ONIG_EXTERN size_t regerror P_((int code, const regex_t* reg, char* buf, size_t size));

View File

@ -102,11 +102,31 @@ extern "C" {
typedef unsigned char OnigUChar;
typedef unsigned int OnigCodePoint;
typedef unsigned int OnigCtype;
typedef size_t OnigDistance;
typedef ptrdiff_t OnigPosition;
typedef size_t OnigDistance;
#define ONIG_INFINITE_DISTANCE ~((OnigDistance )0)
#if defined(_MSC_VER)
typedef __int64 OnigPosition;
#else
typedef long long OnigPosition;
#endif
#define ONIG_BADPOS -1
#define ONIG_IS_BADPOS(p) (p == ONIG_BADPOS)
#define ONIG_IS_NOT_BADPOS(p) (p != ONIG_BADPOS)
typedef UChar (*OnigCharAtFunc)(OnigPosition pos, const void* ptr);
typedef struct OnigIteratorStruct {
OnigCharAtFunc at;
const void* ptr;
} OnigIterator;
#define ONIG_CHARAT(pos) (it->at(pos, it->ptr))
/* Iterator API */
ONIG_EXTERN
UChar onig_default_charat P_((OnigPosition pos, const void* ptr));
typedef unsigned int OnigCaseFoldType; /* case fold flag */
ONIG_EXTERN OnigCaseFoldType OnigDefaultCaseFoldFlag;
@ -148,20 +168,25 @@ typedef int (*OnigApplyAllCaseFoldFunc)(OnigCodePoint from, OnigCodePoint* to, i
typedef struct OnigEncodingTypeST {
int (*mbc_enc_len)(const OnigUChar* p);
int (*mbc_enc_len_se)(OnigIterator* it, OnigPosition p);
const char* name;
int max_enc_len;
int min_enc_len;
int (*is_mbc_newline)(const OnigUChar* p, const OnigUChar* end);
int (*is_mbc_newline_se)(OnigIterator* it, OnigPosition p, OnigPosition end);
OnigCodePoint (*mbc_to_code)(const OnigUChar* p, const OnigUChar* end);
OnigCodePoint (*mbc_to_code_se)(OnigIterator* it, OnigPosition p, OnigPosition end);
int (*code_to_mbclen)(OnigCodePoint code);
int (*code_to_mbc)(OnigCodePoint code, OnigUChar *buf);
int (*mbc_case_fold)(OnigCaseFoldType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to);
int (*mbc_case_fold_se)(OnigIterator* it, OnigCaseFoldType flag, OnigPosition* pp, OnigPosition end, OnigUChar* to);
int (*apply_all_case_fold)(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg);
int (*get_case_fold_codes_by_str)(OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem acs[]);
int (*property_name_to_ctype)(struct OnigEncodingTypeST* enc, OnigUChar* p, OnigUChar* end);
int (*is_code_ctype)(OnigCodePoint code, OnigCtype ctype);
int (*get_ctype_code_range)(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[]);
OnigUChar* (*left_adjust_char_head)(const OnigUChar* start, const OnigUChar* p);
OnigPosition (*left_adjust_char_head_se)(OnigIterator* it, OnigPosition start, OnigPosition p);
int (*is_allowed_reverse_match)(const OnigUChar* p, const OnigUChar* end);
unsigned int flags;
} OnigEncodingType;
@ -270,37 +295,57 @@ ONIG_EXTERN OnigEncodingType OnigEncodingGB18030;
#define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF)
#define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1)
#define ONIGENC_IS_MBC_HEAD(enc,p) (ONIGENC_MBC_ENC_LEN(enc,p) != 1)
#define ONIGENC_IS_MBC_HEAD_SE(it,enc,p) (ONIGENC_MBC_ENC_LEN_SE(it,enc,p) != 1)
#define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128)
#define ONIGENC_IS_MBC_ASCII_SE(c) (c < 128)
#define ONIGENC_IS_CODE_ASCII(code) ((code) < 128)
#define ONIGENC_IS_MBC_WORD(enc,s,end) \
ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE(enc,s,end))
#define ONIGENC_IS_MBC_WORD_SE(it,enc,s,end) \
ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE_SE(it,enc,s,end))
#define ONIGENC_IS_MBC_SINGLEBYTE_SE(it,enc,s,end) \
(ONIGENC_MBC_TO_CODE_SE(it,enc,s,end) <= 0xFF)
#define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \
onigenc_ascii_is_code_ctype( \
ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD)
#define ONIGENC_IS_MBC_ASCII_WORD_SE(it,enc,s,end) \
onigenc_ascii_is_code_ctype( \
ONIGENC_MBC_TO_CODE_SE(it,enc,s,end),ONIGENC_CTYPE_WORD)
#define ONIGENC_IS_UNICODE(enc) ((enc)->flags & ONIGENC_FLAG_UNICODE)
#define ONIGENC_SCRIPT(enc,s,end) (onigenc_unicode_code_script(ONIGENC_MBC_TO_CODE(enc,s,end)))
#define ONIGENC_SCRIPT_SE(it,enc,s,end) (onigenc_unicode_code_script(ONIGENC_MBC_TO_CODE_SE(it,enc,s,end)))
#define ONIGENC_NAME(enc) ((enc)->name)
#define ONIGENC_MBC_CASE_FOLD(enc,flag,pp,end,buf) \
(enc)->mbc_case_fold(flag,(const OnigUChar** )pp,end,buf)
#define ONIGENC_MBC_CASE_FOLD_SE(it,enc,flag,pp,end,buf) \
(enc)->mbc_case_fold_se(it,flag,(OnigPosition* )pp,end,buf)
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
(enc)->is_allowed_reverse_match(s,end)
#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \
(enc)->left_adjust_char_head(start, s)
#define ONIGENC_LEFT_ADJUST_CHAR_HEAD_SE(it,enc,start,s) \
(enc)->left_adjust_char_head_se(it, start, s)
#define ONIGENC_APPLY_ALL_CASE_FOLD(enc,case_fold_flag,f,arg) \
(enc)->apply_all_case_fold(case_fold_flag,f,arg)
#define ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc,case_fold_flag,p,end,acs) \
(enc)->get_case_fold_codes_by_str(case_fold_flag,p,end,acs)
#define ONIGENC_STEP_BACK(enc,start,s,n) \
onigenc_step_back((enc),(start),(s),(n))
#define ONIGENC_STEP_BACK_SE(it,enc,start,s,n) \
onigenc_step_back_se((it),(enc),(start),(s),(n))
#define ONIGENC_MBC_ENC_LEN(enc,p) (enc)->mbc_enc_len(p)
#define ONIGENC_MBC_ENC_LEN_SE(it,enc,p) (enc)->mbc_enc_len_se((it),(p))
#define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len)
#define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc)
#define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len)
#define ONIGENC_IS_MBC_NEWLINE(enc,p,end) (enc)->is_mbc_newline((p),(end))
#define ONIGENC_IS_MBC_NEWLINE_SE(it,enc,p,end) (enc)->is_mbc_newline_se((it),(p),(end))
#define ONIGENC_MBC_TO_CODE(enc,p,end) (enc)->mbc_to_code((p),(end))
#define ONIGENC_MBC_TO_CODE_SE(it,enc,p,end) (enc)->mbc_to_code_se((it),(p),(end))
#define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code)
#define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf)
#define ONIGENC_PROPERTY_NAME_TO_CTYPE(enc,p,end) \
@ -342,7 +387,8 @@ ONIG_EXTERN OnigEncodingType OnigEncodingGB18030;
ONIG_EXTERN
OnigUChar* onigenc_step_back P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, int n));
ONIG_EXTERN
OnigPosition onigenc_step_back_se P_((OnigIterator* it, OnigEncoding enc, OnigPosition start, OnigPosition s, int n));
/* encoding API */
ONIG_EXTERN
@ -356,18 +402,29 @@ void onigenc_set_default_caseconv_table P_((const OnigUChar* table));
ONIG_EXTERN
OnigUChar* onigenc_get_right_adjust_char_head_with_prev P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar** prev));
ONIG_EXTERN
OnigPosition onigenc_get_right_adjust_char_head_with_prev_se P_((OnigIterator* it, OnigEncoding enc, OnigPosition start, OnigPosition s, OnigPosition* prev));
ONIG_EXTERN
OnigUChar* onigenc_get_prev_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s));
ONIG_EXTERN
OnigPosition onigenc_get_prev_char_head_se P_((OnigIterator* it, OnigEncoding enc, OnigPosition start, OnigPosition s));
ONIG_EXTERN
OnigUChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s));
ONIG_EXTERN
OnigPosition onigenc_get_left_adjust_char_head_se P_((OnigIterator* it, OnigEncoding enc, OnigPosition start, OnigPosition s));
ONIG_EXTERN
OnigUChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s));
ONIG_EXTERN
OnigPosition onigenc_get_right_adjust_char_head_se P_((OnigIterator* it, OnigEncoding enc, OnigPosition start, OnigPosition s));
ONIG_EXTERN
int onigenc_strlen P_((OnigEncoding enc, const OnigUChar* p, const OnigUChar* end));
ONIG_EXTERN
int onigenc_strlen_se P_((OnigIterator* it, OnigEncoding enc, OnigPosition p, OnigPosition end));
ONIG_EXTERN
int onigenc_strlen_null P_((OnigEncoding enc, const OnigUChar* p));
ONIG_EXTERN
int onigenc_str_bytelen_null P_((OnigEncoding enc, const OnigUChar* p));
ONIG_EXTERN
OnigPosition onigenc_str_bytelen_null_se P_((OnigIterator* it, OnigEncoding enc, OnigPosition p));
/* PART: regular expression */
@ -406,7 +463,9 @@ typedef unsigned int OnigOptionType;
#define ONIG_OPTION_WORD_BOUND_ALL_RANGE (ONIG_OPTION_POSIX_BRACKET_ALL_RANGE << 1)
/* options (newline) */
#define ONIG_OPTION_NEWLINE_CRLF (ONIG_OPTION_WORD_BOUND_ALL_RANGE << 1)
#define ONIG_OPTION_MAXBIT ONIG_OPTION_NEWLINE_CRLF /* limit */
// options (whole word)
#define SE_ONIG_OPTION_WHOLEWORD (ONIG_OPTION_NEWLINE_CRLF << 1)
#define ONIG_OPTION_MAXBIT SE_ONIG_OPTION_WHOLEWORD /* limit */
#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt))
#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt))
@ -422,6 +481,7 @@ typedef struct {
} OnigSyntaxType;
ONIG_EXTERN OnigSyntaxType OnigSyntaxASIS;
ONIG_EXTERN OnigSyntaxType OnigSyntaxWildChar;
ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic;
ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended;
ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs;
@ -436,6 +496,7 @@ ONIG_EXTERN OnigSyntaxType OnigSyntaxPython;
/* predefined syntaxes (see regsyntax.c) */
#define ONIG_SYNTAX_ASIS (&OnigSyntaxASIS)
#define ONIG_SYNTAX_WILDCHAR (&OnigSyntaxWildChar)
#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic)
#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended)
#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs)
@ -775,11 +836,13 @@ int onig_recompile P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pat
ONIG_EXTERN
int onig_recompile_deluxe P_((OnigRegex reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
ONIG_EXTERN
OnigPosition onig_search P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option));
OnigPosition onig_search P_((OnigIterator* it, OnigRegex, OnigPosition str, OnigPosition end, OnigPosition start, OnigPosition range, OnigRegion* region, OnigOptionType option));
ONIG_EXTERN
OnigPosition onig_search_gpos P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* global_pos, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option));
OnigPosition onig_search_gpos P_((OnigIterator* it, OnigRegex, OnigPosition str, OnigPosition end, OnigPosition global_pos, OnigPosition start, OnigPosition range, OnigRegion* region, OnigOptionType option));
ONIG_EXTERN
OnigPosition onig_match P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option));
OnigPosition onig_match P_((OnigIterator* it, OnigRegex, OnigPosition str, OnigPosition end, OnigPosition at, OnigRegion* region, OnigOptionType option));
ONIG_EXTERN
OnigPosition onig_match_gpos P_((OnigIterator* it, OnigRegex, OnigPosition str, OnigPosition end, OnigPosition global_pos, OnigPosition at, OnigRegion* region, OnigOptionType option));
ONIG_EXTERN
OnigRegion* onig_region_new P_((void));
ONIG_EXTERN

View File

@ -5791,9 +5791,13 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
scan_env.mem_nodes_dynamic = (Node** )NULL;
}
if (IS_WHOLEWORD(reg->options))
r = add_opcode(reg, OP_WORD_BEGIN);
r = compile_tree(root, reg);
if (r == 0) {
r = add_opcode(reg, OP_END);
if (IS_WHOLEWORD(reg->options))
r = add_opcode(reg, OP_WORD_END);
r = add_opcode(reg, OP_END);
#ifdef USE_SUBEXP_CALL
if (scan_env.num_call > 0) {
r = unset_addr_list_fix(&uslist, reg);
@ -5996,6 +6000,10 @@ onig_end(void)
onig_free_node_list();
#endif
#ifdef USE_SHARED_UNICODE_TABLE
onig_free_shared_unicode_table();
#endif
onig_inited = 0;
THREAD_ATOMIC_END;

View File

@ -61,6 +61,16 @@ onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const U
return p;
}
extern OnigPosition
onigenc_get_right_adjust_char_head_se(OnigIterator* it, OnigEncoding enc, OnigPosition start, OnigPosition s)
{
OnigPosition p = ONIGENC_LEFT_ADJUST_CHAR_HEAD_SE(it, enc, start, s);
if (p < s) {
p += enclen_se(it, enc, p);
}
return p;
}
extern UChar*
onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
const UChar* start, const UChar* s, const UChar** prev)
@ -77,6 +87,22 @@ onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
return p;
}
extern OnigPosition
onigenc_get_right_adjust_char_head_with_prev_se(OnigIterator* it, OnigEncoding enc,
OnigPosition start, OnigPosition s, OnigPosition* prev)
{
OnigPosition p = ONIGENC_LEFT_ADJUST_CHAR_HEAD_SE(it, enc, start, s);
if (p < s) {
if (prev) *prev = p;
p += enclen_se(it, enc, p);
}
else {
if (prev) *prev = ONIG_BADPOS; /* Sorry */
}
return p;
}
extern UChar*
onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
{
@ -86,6 +112,15 @@ onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
}
extern OnigPosition
onigenc_get_prev_char_head_se(OnigIterator* it, OnigEncoding enc, OnigPosition start, OnigPosition s)
{
if (s <= start)
return ONIG_BADPOS;
return ONIGENC_LEFT_ADJUST_CHAR_HEAD_SE(it, enc, start, s - 1);
}
extern UChar*
onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n)
{
@ -98,6 +133,18 @@ onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n)
return (UChar* )s;
}
extern OnigPosition
onigenc_step_back_se(OnigIterator* it, OnigEncoding enc, OnigPosition start, OnigPosition s, int n)
{
while (ONIG_IS_NOT_BADPOS(s) && n-- > 0) {
if (s <= start)
return ONIG_BADPOS;
s = ONIGENC_LEFT_ADJUST_CHAR_HEAD_SE(it, enc, start, s - 1);
}
return s;
}
extern UChar*
onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)
{
@ -121,6 +168,18 @@ onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
return n;
}
extern int
onigenc_strlen_se(OnigIterator* it, OnigEncoding enc, OnigPosition p, OnigPosition end)
{
int n = 0;
while (p < end) {
p += ONIGENC_MBC_ENC_LEN_SE(it, enc, p);
n++;
}
return n;
}
extern int
onigenc_strlen_null(OnigEncoding enc, const UChar* s)
{
@ -170,6 +229,30 @@ onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
}
}
extern OnigPosition
onigenc_str_bytelen_null_se(OnigIterator* it, OnigEncoding enc, OnigPosition s)
{
OnigPosition start = s;
OnigPosition p = s;
while (1) {
if (ONIG_CHARAT(p) == '\0') {
OnigPosition q;
int len = ONIGENC_MBC_MINLEN(enc);
if (len == 1) return p - start;
q = p + 1;
while (len > 1) {
if (ONIG_CHARAT(q) != '\0') break;
q++;
len--;
}
if (len == 1) return p - start;
}
p += ONIGENC_MBC_ENC_LEN_SE(it, enc, p);
}
}
const UChar OnigEncAsciiToLowerCaseTable[] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
@ -362,6 +445,12 @@ onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UC
return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
}
extern OnigPosition
onigenc_get_left_adjust_char_head_se(OnigIterator* it, OnigEncoding enc, OnigPosition start, OnigPosition s)
{
return ONIGENC_LEFT_ADJUST_CHAR_HEAD_SE(it, enc, start, s);
}
const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = {
{ 0x41, 0x61 },
{ 0x42, 0x62 },
@ -570,6 +659,15 @@ onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end)
return 0;
}
extern int
onigenc_is_mbc_newline_0x0a_se(OnigIterator* it, OnigPosition p, OnigPosition end)
{
if (p < end) {
if (ONIG_CHARAT(p) == 0x0a) return 1;
}
return 0;
}
/* for single byte encodings */
extern int
onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,
@ -581,6 +679,17 @@ onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,
return 1; /* return byte length of converted char to lower */
}
/* for single byte encodings */
extern int
onigenc_ascii_mbc_case_fold_se(OnigIterator* it, OnigCaseFoldType flag ARG_UNUSED, OnigPosition* p,
OnigPosition end ARG_UNUSED, UChar* lower)
{
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(ONIG_CHARAT(*p));
(*p)++;
return 1; /* return byte length of converted char to lower */
}
#if 0
extern int
onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag,
@ -599,12 +708,24 @@ onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED)
return 1;
}
extern int
onigenc_single_byte_mbc_enc_len_se(OnigIterator* it ARG_UNUSED, OnigPosition p ARG_UNUSED)
{
return 1;
}
extern OnigCodePoint
onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
{
return (OnigCodePoint )(*p);
}
extern OnigCodePoint
onigenc_single_byte_mbc_to_code_se(OnigIterator* it, OnigPosition p, OnigPosition end ARG_UNUSED)
{
return (OnigCodePoint )(ONIG_CHARAT(p));
}
extern int
onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED)
{
@ -625,6 +746,13 @@ onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED,
return (UChar* )s;
}
extern OnigPosition
onigenc_single_byte_left_adjust_char_head_se(OnigIterator* it ARG_UNUSED, OnigPosition start ARG_UNUSED,
OnigPosition s)
{
return s;
}
extern int
onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED,
const UChar* end ARG_UNUSED)
@ -666,6 +794,24 @@ onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
return n;
}
extern OnigCodePoint
onigenc_mbn_mbc_to_code_se(OnigIterator* it, OnigEncoding enc, OnigPosition p, OnigPosition end)
{
int c, i, len;
OnigCodePoint n;
len = enclen_se(it, enc, p);
n = (OnigCodePoint )(ONIG_CHARAT(p++));
if (len == 1) return n;
for (i = 1; i < len; i++) {
if (p >= end) break;
c = ONIG_CHARAT(p++);
n <<= 8; n += c;
}
return n;
}
extern int
onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
const UChar** pp, const UChar* end ARG_UNUSED,
@ -691,6 +837,32 @@ onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
}
}
extern int
onigenc_mbn_mbc_case_fold_se(OnigIterator* it, OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
OnigPosition* pp, OnigPosition end ARG_UNUSED,
UChar* lower)
{
int len;
OnigPosition p = *pp;
const UChar c = ONIG_CHARAT(*pp);
if (ONIGENC_IS_MBC_ASCII_SE(c)) {
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(c);
(*pp)++;
return 1;
}
else {
int i;
len = enclen_se(it, enc, p);
for (i = 0; i < len; i++) {
*lower++ = ONIG_CHARAT(p++);
}
(*pp) += len;
return len; /* return byte length of converted to lower char */
}
}
#if 0
extern int
onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,

View File

@ -73,6 +73,7 @@ typedef struct {
#define ONIG_CHECK_NULL_RETURN_VAL(p,val) if (ONIG_IS_NULL(p)) return (val)
#define enclen(enc,p) ONIGENC_MBC_ENC_LEN(enc,p)
#define enclen_se(it,enc,p) ONIGENC_MBC_ENC_LEN_SE(it,enc,p)
/* character types bit flag */
#define BIT_CTYPE_NEWLINE (1<< ONIGENC_CTYPE_NEWLINE)
@ -107,7 +108,7 @@ typedef struct {
#define USE_CRNL_AS_LINE_TERMINATOR
#define USE_UNICODE_PROPERTIES
/* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */
/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTS #18 */
#define USE_UNICODE_ALL_LINE_TERMINATORS /* see Unicode.org UTS #18 */
#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII
@ -119,22 +120,29 @@ ONIG_EXTERN int onigenc_apply_all_case_fold_with_map P_((int map_size, const Oni
ONIG_EXTERN int onigenc_get_case_fold_codes_by_str_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[]));
ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end));
ONIG_EXTERN int onigenc_is_mbc_newline_0x0a_se P_((OnigIterator* it, OnigPosition p, OnigPosition end));
/* methods for single byte encoding */
ONIG_EXTERN int onigenc_ascii_mbc_case_fold P_((OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower));
ONIG_EXTERN int onigenc_ascii_mbc_case_fold_se P_((OnigIterator* it, OnigCaseFoldType flag, OnigPosition* p, OnigPosition end, UChar* lower));
ONIG_EXTERN int onigenc_single_byte_mbc_enc_len P_((const UChar* p));
ONIG_EXTERN int onigenc_single_byte_mbc_enc_len_se P_((OnigIterator* it, OnigPosition p));
ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code P_((const UChar* p, const UChar* end));
ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code_se P_((OnigIterator* it, OnigPosition p, OnigPosition end));
ONIG_EXTERN int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code));
ONIG_EXTERN int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf));
ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((const UChar* start, const UChar* s));
ONIG_EXTERN OnigPosition onigenc_single_byte_left_adjust_char_head_se P_((OnigIterator* it, OnigPosition start, OnigPosition s));
ONIG_EXTERN int onigenc_always_true_is_allowed_reverse_match P_((const UChar* s, const UChar* end));
ONIG_EXTERN int onigenc_always_false_is_allowed_reverse_match P_((const UChar* s, const UChar* end));
ONIG_EXTERN int onigenc_ascii_is_code_ctype P_((OnigCodePoint code, unsigned int ctype));
/* methods for multi byte encoding */
ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, const UChar* p, const UChar* end));
ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code_se P_((OnigIterator* it, OnigEncoding enc, OnigPosition p, OnigPosition end));
ONIG_EXTERN int onigenc_mbn_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower));
ONIG_EXTERN int onigenc_mbn_mbc_case_fold_se P_((OnigIterator* it, OnigEncoding enc, OnigCaseFoldType flag, OnigPosition* p, OnigPosition end, UChar* lower));
ONIG_EXTERN int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code));
ONIG_EXTERN int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
ONIG_EXTERN int onigenc_minimum_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end));
@ -146,11 +154,13 @@ ONIG_EXTERN int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint co
/* in enc/unicode.c */
ONIG_EXTERN const OnigCodePoint* onigenc_unicode_code_script P_((OnigCodePoint code));
ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype));
ONIG_EXTERN int onigenc_utf16_32_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[]));
ONIG_EXTERN int onigenc_unicode_ctype_code_range P_((int ctype, const OnigCodePoint* ranges[]));
ONIG_EXTERN int onigenc_unicode_get_case_fold_codes_by_str P_((OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
ONIG_EXTERN int onigenc_unicode_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold));
ONIG_EXTERN int onigenc_unicode_mbc_case_fold_se P_((OnigIterator* it, OnigEncoding enc, OnigCaseFoldType flag, OnigPosition* pp, OnigPosition end, UChar* fold));
ONIG_EXTERN int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));

File diff suppressed because it is too large Load Diff

View File

@ -62,20 +62,20 @@ re_adjust_startpos(regex_t* reg, const char* string, int size,
}
extern int
re_match(regex_t* reg, const char* str, int size, int pos,
re_match(OnigIterator* it, regex_t* reg, OnigPosition str, OnigPosition size, OnigPosition pos,
struct re_registers* regs)
{
return (int )onig_match(reg, (UChar* )str, (UChar* )(str + size),
(UChar* )(str + pos), regs, ONIG_OPTION_NONE);
return (int )onig_match(it, reg, str, (str + size),
(str + pos), regs, ONIG_OPTION_NONE);
}
extern int
re_search(regex_t* bufp, const char* string, int size, int startpos, int range,
re_search(OnigIterator* it, regex_t* bufp, OnigPosition str, OnigPosition size, OnigPosition startpos, OnigPosition range,
struct re_registers* regs)
{
return (int )onig_search(bufp, (UChar* )string, (UChar* )(string + size),
(UChar* )(string + startpos),
(UChar* )(string + startpos + range),
return (int )onig_search(it, bufp, str, (str + size),
(str + startpos),
(str + startpos + range),
regs, ONIG_OPTION_NONE);
}

View File

@ -76,6 +76,8 @@
#define USE_SHARED_CCLASS_TABLE
#define USE_SUNDAY_QUICK_SEARCH
#define USE_SHARED_UNICODE_TABLE
#define INIT_MATCH_STACK_SIZE 160
#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */
@ -336,6 +338,7 @@ typedef unsigned int BitStatusType;
#define IS_POSIX_BRACKET_ALL_RANGE(option) ((option) & ONIG_OPTION_POSIX_BRACKET_ALL_RANGE)
#define IS_WORD_BOUND_ALL_RANGE(option) ((option) & ONIG_OPTION_WORD_BOUND_ALL_RANGE)
#define IS_NEWLINE_CRLF(option) ((option) & ONIG_OPTION_NEWLINE_CRLF)
#define IS_WHOLEWORD(option) ((option) & SE_ONIG_OPTION_WHOLEWORD)
/* OP_SET_OPTION is required for these options.
#define IS_DYNAMIC_OPTION(option) \
@ -743,19 +746,19 @@ typedef struct {
BBuf* mbuf; /* multi-byte info or NULL */
} CClassNode;
typedef intptr_t OnigStackIndex;
typedef OnigPosition OnigStackIndex;
typedef struct _OnigStackType {
unsigned int type;
union {
struct {
UChar *pcode; /* byte code position */
UChar *pstr; /* string position */
UChar *pstr_prev; /* previous char position of pstr */
OnigPosition pstr; /* string position */
OnigPosition pstr_prev; /* previous char position of pstr */
#ifdef USE_COMBINATION_EXPLOSION_CHECK
unsigned int state_check;
#endif
UChar *pkeep; /* keep pattern position */
OnigPosition pkeep; /* keep pattern position */
} state;
struct {
int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
@ -767,20 +770,20 @@ typedef struct _OnigStackType {
} repeat_inc;
struct {
int num; /* memory num */
UChar *pstr; /* start/end position */
OnigPosition pstr; /* start/end position */
/* Following information is set, if this stack type is MEM-START */
OnigStackIndex start; /* prev. info (for backtrack "(...)*" ) */
OnigStackIndex end; /* prev. info (for backtrack "(...)*" ) */
} mem;
struct {
int num; /* null check id */
UChar *pstr; /* start position */
OnigPosition pstr; /* start position */
} null_check;
#ifdef USE_SUBEXP_CALL
struct {
UChar *ret_addr; /* byte code position */
int num; /* null check id */
UChar *pstr; /* string position */
OnigPosition pstr; /* string position */
} call_frame;
#endif
} u;
@ -791,11 +794,11 @@ typedef struct {
size_t stack_n;
OnigOptionType options;
OnigRegion* region;
const UChar* start; /* search start position */
const UChar* gpos; /* global position (for \G: BEGIN_POSITION) */
OnigPosition start; /* search start position */
OnigPosition gpos; /* global position (for \G: BEGIN_POSITION) */
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
OnigPosition best_len; /* for ONIG_OPTION_FIND_LONGEST */
UChar* best_s;
OnigPosition best_s;
#endif
#ifdef USE_COMBINATION_EXPLOSION_CHECK
void* state_check_buff;

View File

@ -35,7 +35,6 @@
#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
OnigSyntaxType OnigSyntaxRuby = {
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
@ -64,7 +63,7 @@ OnigSyntaxType OnigSyntaxRuby = {
ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
, ( ONIG_OPTION_ASCII_RANGE | ONIG_OPTION_POSIX_BRACKET_ALL_RANGE |
, ( /*ONIG_OPTION_ASCII_RANGE |*/ ONIG_OPTION_POSIX_BRACKET_ALL_RANGE |
ONIG_OPTION_WORD_BOUND_ALL_RANGE )
,
{
@ -5341,6 +5340,51 @@ onig_free_shared_cclass_table(void)
#endif /* USE_SHARED_CCLASS_TABLE */
#ifdef USE_SHARED_UNICODE_TABLE
extern st_table* FoldTable; /* fold-1, fold-2, fold-3 */
extern st_table* Unfold1Table;
extern st_table* Unfold2Table;
extern st_table* Unfold3Table;
extern int CaseFoldInited;
static int
i_free_shared_unicode_table(st_str_end_key* key, Node* node, void* arg ARG_UNUSED)
{
if (IS_NOT_NULL(key)) xfree(key);
return ST_DELETE;
}
extern int
onig_free_shared_unicode_table(void)
{
THREAD_ATOMIC_START;
if (IS_NOT_NULL(FoldTable)) {
onig_st_free_table(FoldTable);
FoldTable = NULL;
}
if (IS_NOT_NULL(Unfold1Table)) {
onig_st_free_table(Unfold1Table);
Unfold1Table = NULL;
}
if (IS_NOT_NULL(Unfold2Table)) {
onig_st_free_table(Unfold2Table);
Unfold2Table = NULL;
}
if (IS_NOT_NULL(Unfold3Table)) {
onig_st_free_table(Unfold3Table);
Unfold3Table = NULL;
}
CaseFoldInited = 0;
THREAD_ATOMIC_END;
return 0;
}
#endif // USE_SHARED_UNICODE_TABLE
#ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
static int

View File

@ -347,6 +347,10 @@ extern int onig_names_free P_((regex_t* reg));
extern int onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
extern int onig_free_shared_cclass_table P_((void));
#ifdef USE_SHARED_UNICODE_TABLE
extern int onig_free_shared_unicode_table P_((void));
#endif //USE_SHARED_UNICODE_TABLE
#ifdef ONIG_DEBUG
#ifdef USE_NAMED_GROUP
extern int onig_print_names(FILE*, regex_t*);

View File

@ -27,7 +27,9 @@
* SUCH DAMAGE.
*/
#include "config.h"
#define regex_t onig_regex_t
#include "regint.h"
#undef regex_t
#include "onigposix.h"
#ifdef HAVE_STRING_H

View File

@ -48,6 +48,18 @@
} \
} while(0)
/* #define ENC_STRING_LEN_SE(enc,s,len) len = strlen(s) */
#define ENC_STRING_LEN_SE(enc,s,len) do { \
if (ONIGENC_MBC_MINLEN(enc) == 1) { \
OnigPosition tmps = s; \
while (ONIG_CHARAT(tmps) != 0) tmps++; \
len = tmps - s; \
} \
else { \
len = onigenc_str_bytelen_null_se(it, enc, s); \
} \
} while(0)
typedef struct {
int onig_err;
int posix_err;
@ -163,12 +175,12 @@ regcomp(regex_t* reg, const char* pattern, int posix_options)
return 0;
}
extern int
regexec(regex_t* reg, const char* str, size_t nmatch,
extern OnigPosition
regexec(OnigIterator* it, regex_t* reg, OnigPosition str, size_t nmatch,
regmatch_t pmatch[], int posix_options)
{
int r, i, len;
UChar* end;
OnigPosition r, i, len;
OnigPosition end;
regmatch_t* pm;
OnigOptionType options;
@ -190,9 +202,9 @@ regexec(regex_t* reg, const char* str, size_t nmatch,
pm = pmatch;
}
ENC_STRING_LEN(ONIG_C(reg)->enc, str, len);
end = (UChar* )(str + len);
r = (int )onig_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end,
ENC_STRING_LEN_SE(ONIG_C(reg)->enc, str, len);
end = str + len;
r = onig_search(it, ONIG_C(reg), str, end, str, end,
(OnigRegion* )pm, options);
if (r >= 0) {
@ -207,7 +219,7 @@ regexec(regex_t* reg, const char* str, size_t nmatch,
pmatch[i].rm_so = pmatch[i].rm_eo = ONIG_REGION_NOTPOS;
}
else {
r = onig2posix_error_code(r);
r = onig2posix_error_code((int)r);
}
if (pm != pmatch && pm != NULL)

View File

@ -46,6 +46,22 @@ OnigSyntaxType OnigSyntaxASIS = {
}
};
OnigSyntaxType OnigSyntaxWildChar = {
ONIG_SYN_OP_VARIABLE_META_CHARACTERS
, 0
, 0
, ONIG_OPTION_SINGLELINE
,
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )'?' /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )'*' /* anychar anytime */
}
};
OnigSyntaxType OnigSyntaxPosixBasic = {
( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
ONIG_SYN_OP_ESC_BRACE_INTERVAL )

View File

@ -9,12 +9,12 @@
#include <string.h>
#include "oniguruma.h"
/* #define USE_UNICODE_ALL_LINE_TERMINATORS */
#define USE_UNICODE_ALL_LINE_TERMINATORS
static int nfail = 0;
static void result(int no, int from, int to,
int expected_from, int expected_to)
static void result(int no, OnigPosition from, OnigPosition to,
OnigPosition expected_from, OnigPosition expected_to)
{
fprintf(stderr, "%3d: ", no);
if (from == expected_from && to == expected_to) {
@ -22,7 +22,7 @@ static void result(int no, int from, int to,
}
else {
fprintf(stderr, "Fail: expected: (%d-%d), result: (%d-%d)\n",
expected_from, expected_to, from, to);
(int)expected_from, (int)expected_to, (int)from, (int)to);
nfail++;
}
@ -32,12 +32,13 @@ static int
x0(int no, char* pattern_arg, char* str_arg,
int start_offset, int expected_from, int expected_to, int backward)
{
int r;
unsigned char *start, *range, *end;
OnigPosition r;
OnigPosition start, range, end;
regex_t* reg;
OnigErrorInfo einfo;
OnigRegion *region;
UChar *pattern, *str;
OnigIterator it = {onig_default_charat, str_arg};
pattern = (UChar* )pattern_arg;
str = (UChar* )str_arg;
@ -53,16 +54,16 @@ x0(int no, char* pattern_arg, char* str_arg,
region = onig_region_new();
end = str + strlen((char* )str);
end = strlen((char* )str);
if (backward) {
start = end + start_offset;
range = str;
range = 0;
}
else {
start = str + start_offset;
start = start_offset;
range = end;
}
r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
r = onig_search(&it, reg, 0, end, start, range, region, ONIG_OPTION_NONE);
if (r >= 0 || r == ONIG_MISMATCH) {
result(no, region->beg[0], region->end[0], expected_from, expected_to);
}

View File

@ -5,24 +5,24 @@
#include "oniguruma.h"
static int
search(regex_t* reg, unsigned char* str, unsigned char* end)
search(OnigIterator* it, regex_t* reg, OnigPosition str, OnigPosition end)
{
int r;
unsigned char *start, *range;
OnigPosition r;
OnigPosition start, range;
OnigRegion *region;
region = onig_region_new();
start = str;
range = end;
r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
r = onig_search(it, reg, str, end, start, range, region, ONIG_OPTION_NONE);
if (r >= 0) {
int i;
fprintf(stderr, "match at %d (%s)\n", r,
fprintf(stderr, "match at %d (%s)\n", (int)r,
ONIGENC_NAME(onig_get_encoding(reg)));
for (i = 0; i < region->num_regs; i++) {
fprintf(stderr, "%d: (%ld-%ld)\n", i, region->beg[i], region->end[i]);
fprintf(stderr, "%d: (%ld-%ld)\n", i, (int)region->beg[i], (int)region->end[i]);
}
}
else if (r == ONIG_MISMATCH) {
@ -45,13 +45,15 @@ static int
exec(OnigEncoding enc, OnigOptionType options,
char* apattern, char* astr)
{
int r;
unsigned char *end;
OnigPosition r;
OnigPosition end;
regex_t* reg;
OnigErrorInfo einfo;
UChar* pattern = (UChar* )apattern;
UChar* str = (UChar* )astr;
OnigIterator it = {onig_default_charat, str};
onig_init();
r = onig_new(&reg, pattern,
pattern + onigenc_str_bytelen_null(enc, pattern),
options, enc, ONIG_SYNTAX_DEFAULT, &einfo);
@ -62,8 +64,8 @@ exec(OnigEncoding enc, OnigOptionType options,
return -1;
}
end = str + onigenc_str_bytelen_null(enc, str);
r = search(reg, str, end);
end = onigenc_str_bytelen_null(enc, str);
r = search(&it, reg, 0, end);
onig_free(reg);
onig_end();
@ -84,13 +86,14 @@ static int
exec_deluxe(OnigEncoding pattern_enc, OnigEncoding str_enc,
OnigOptionType options, char* apattern, char* astr)
{
int r;
unsigned char *end;
OnigPosition r;
OnigPosition end;
regex_t* reg;
OnigCompileInfo ci;
OnigErrorInfo einfo;
UChar* pattern = (UChar* )apattern;
UChar* str = (UChar* )astr;
OnigIterator it = {onig_default_charat, str};
ci.num_of_elements = 5;
ci.pattern_enc = pattern_enc;
@ -109,8 +112,8 @@ exec_deluxe(OnigEncoding pattern_enc, OnigEncoding str_enc,
return -1;
}
end = str + onigenc_str_bytelen_null(str_enc, str);
r = search(reg, str, end);
end = onigenc_str_bytelen_null(str_enc, str);
r = search(&it, reg, 0, end);
onig_free(reg);
onig_end();

View File

@ -20,18 +20,19 @@ node_callback(int group, OnigPosition beg, OnigPosition end, int level,
for (i = 0; i < level * 2; i++)
fputc(' ', stderr);
fprintf(stderr, "%d: (%ld-%ld)\n", group, beg, end);
fprintf(stderr, "%d: (%ld-%ld)\n", group, (int)beg, (int)end);
return 0;
}
extern int ex(unsigned char* str, unsigned char* pattern,
OnigSyntaxType* syntax)
{
int r;
unsigned char *start, *range, *end;
OnigPosition r;
OnigPosition start, range, end;
regex_t* reg;
OnigErrorInfo einfo;
OnigRegion *region;
OnigIterator it = {onig_default_charat, str};
r = onig_new(&reg, pattern, pattern + strlen((char* )pattern),
ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, syntax, &einfo);
@ -48,16 +49,16 @@ extern int ex(unsigned char* str, unsigned char* pattern,
region = onig_region_new();
end = str + strlen((char* )str);
start = str;
end = strlen((char* )str);
start = 0;
range = end;
r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
r = onig_search(&it, reg, 0, end, start, range, region, ONIG_OPTION_NONE);
if (r >= 0) {
int i;
fprintf(stderr, "match at %d\n", r);
fprintf(stderr, "match at %d\n", (int)r);
for (i = 0; i < region->num_regs; i++) {
fprintf(stderr, "%d: (%ld-%ld)\n", i, region->beg[i], region->end[i]);
fprintf(stderr, "%d: (%ld-%ld)\n", i, (long)region->beg[i], (long)region->end[i]);
}
fprintf(stderr, "\n");

View File

@ -19,21 +19,22 @@ name_callback(const UChar* name, const UChar* name_end,
ref = onig_name_to_backref_number(reg, name, name_end, region);
s = (ref == gn ? "*" : "");
fprintf(stderr, "%s (%d): ", name, gn);
fprintf(stderr, "(%ld-%ld) %s\n", region->beg[gn], region->end[gn], s);
fprintf(stderr, "(%ld-%ld) %s\n", (int)region->beg[gn], (int)region->end[gn], s);
}
return 0; /* 0: continue */
}
extern int main(int argc, char* argv[])
{
int r;
unsigned char *start, *range, *end;
OnigPosition r;
OnigPosition start, range, end;
regex_t* reg;
OnigErrorInfo einfo;
OnigRegion *region;
static UChar* pattern = (UChar* )"(?<foo>a*)(?<bar>b*)(?<foo>c*)";
static UChar* str = (UChar* )"aaabbbbcc";
OnigIterator it = {onig_default_charat, str};
r = onig_new(&reg, pattern, pattern + strlen((char* )pattern),
ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_DEFAULT, &einfo);
@ -48,10 +49,10 @@ extern int main(int argc, char* argv[])
region = onig_region_new();
end = str + strlen((char* )str);
start = str;
end = strlen((char* )str);
start = 0;
range = end;
r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
r = onig_search(&it, reg, 0, end, start, range, region, ONIG_OPTION_NONE);
if (r >= 0) {
fprintf(stderr, "match at %d\n\n", r);
r = onig_foreach_name(reg, name_callback, (void* )region);

View File

@ -2,6 +2,10 @@
* posix.c
*/
#include <stdio.h>
#define regex_t onig_regex_t
#include "regint.h"
#undef regex_t
#include "onigposix.h"
typedef unsigned char UChar;
@ -11,8 +15,9 @@ static int x(regex_t* reg, unsigned char* pattern, unsigned char* str)
int r, i;
char buf[200];
regmatch_t pmatch[20];
OnigIterator it = {onig_default_charat, str};
r = regexec(reg, (char* )str, reg->re_nsub + 1, pmatch, 0);
r = (int)regexec(&it, reg, 0, reg->re_nsub + 1, pmatch, 0);
if (r != 0 && r != REG_NOMATCH) {
regerror(r, reg, buf, sizeof(buf));
fprintf(stderr, "ERROR: %s\n", buf);

View File

@ -7,14 +7,15 @@
extern int main(int argc, char* argv[])
{
int r;
unsigned char *start, *range, *end;
OnigPosition r;
OnigPosition start, range, end;
regex_t* reg;
OnigErrorInfo einfo;
OnigRegion *region;
static UChar* pattern = (UChar* )"a(.*)b|[e-f]+";
static UChar* str = (UChar* )"zzzzaffffffffb";
OnigIterator it = {onig_default_charat, str};
r = onig_new(&reg, pattern, pattern + strlen((char* )pattern),
ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_DEFAULT, &einfo);
@ -27,16 +28,16 @@ extern int main(int argc, char* argv[])
region = onig_region_new();
end = str + strlen((char* )str);
start = str;
end = strlen((char* )str);
start = 0;
range = end;
r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
r = onig_search(&it, reg, 0, end, start, range, region, ONIG_OPTION_NONE);
if (r >= 0) {
int i;
fprintf(stderr, "match at %d\n", r);
for (i = 0; i < region->num_regs; i++) {
fprintf(stderr, "%d: (%ld-%ld)\n", i, region->beg[i], region->end[i]);
fprintf(stderr, "%d: (%ld-%ld)\n", i, (int)region->beg[i], (int)region->end[i]);
}
}
else if (r == ONIG_MISMATCH) {

View File

@ -9,14 +9,15 @@ extern int main(int argc, char* argv[])
{
static OnigSyntaxType SQLSyntax;
int r;
unsigned char *start, *range, *end;
OnigPosition r;
OnigPosition start, range, end;
regex_t* reg;
OnigErrorInfo einfo;
OnigRegion *region;
static UChar* pattern = (UChar* )"\\_%\\\\__zz";
static UChar* str = (UChar* )"a_abcabcabc\\ppzz";
OnigIterator it = {onig_default_charat, str};
onig_set_syntax_op (&SQLSyntax, ONIG_SYN_OP_VARIABLE_META_CHARACTERS);
onig_set_syntax_op2 (&SQLSyntax, 0);
@ -44,16 +45,16 @@ extern int main(int argc, char* argv[])
region = onig_region_new();
end = str + strlen((char* )str);
start = str;
end = strlen((char* )str);
start = 0;
range = end;
r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
r = onig_search(&it, reg, 0, end, start, range, region, ONIG_OPTION_NONE);
if (r >= 0) {
int i;
fprintf(stderr, "match at %d\n", r);
for (i = 0; i < region->num_regs; i++) {
fprintf(stderr, "%d: (%ld-%ld)\n", i, region->beg[i], region->end[i]);
fprintf(stderr, "%d: (%ld-%ld)\n", i, (int)region->beg[i], (int)region->end[i]);
}
}
else if (r == ONIG_MISMATCH) {

View File

@ -8,13 +8,14 @@
extern int exec(OnigSyntaxType* syntax,
char* apattern, char* astr)
{
int r;
unsigned char *start, *range, *end;
OnigPosition r;
OnigPosition start, range, end;
regex_t* reg;
OnigErrorInfo einfo;
OnigRegion *region;
UChar* pattern = (UChar* )apattern;
UChar* str = (UChar* )astr;
OnigIterator it = {onig_default_charat, str};
r = onig_new(&reg, pattern, pattern + strlen((char* )pattern),
ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, syntax, &einfo);
@ -27,16 +28,16 @@ extern int exec(OnigSyntaxType* syntax,
region = onig_region_new();
end = str + strlen((char* )str);
start = str;
end = strlen((char* )str);
start = 0;
range = end;
r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
r = onig_search(&it, reg, 0, end, start, range, region, ONIG_OPTION_NONE);
if (r >= 0) {
int i;
fprintf(stderr, "match at %d\n", r);
for (i = 0; i < region->num_regs; i++) {
fprintf(stderr, "%d: (%ld-%ld)\n", i, region->beg[i], region->end[i]);
fprintf(stderr, "%d: (%ld-%ld)\n", i, (int)region->beg[i], (int)region->end[i]);
}
}
else if (r == ONIG_MISMATCH) {

View File

@ -33,7 +33,7 @@ static OnigRegion* region;
static void xx(char* pattern, char* str, int from, int to, int mem, int not)
{
int r;
OnigPosition r;
#ifdef POSIX_TEST
regex_t reg;
@ -89,6 +89,7 @@ static void xx(char* pattern, char* str, int from, int to, int mem, int not)
regex_t* reg;
OnigErrorInfo einfo;
OnigSyntaxType syn = *ONIG_SYNTAX_DEFAULT;
OnigIterator it = {onig_default_charat, str};
/* ONIG_OPTION_OFF(syn.options, ONIG_OPTION_ASCII_RANGE); */
@ -102,8 +103,8 @@ static void xx(char* pattern, char* str, int from, int to, int mem, int not)
return ;
}
r = onig_search(reg, (UChar* )str, (UChar* )(str + SLEN(str)),
(UChar* )str, (UChar* )(str + SLEN(str)),
r = onig_search(&it, reg, 0, SLEN(str),
0, SLEN(str),
region, ONIG_OPTION_NONE);
if (r < ONIG_MISMATCH) {
char s[ONIG_MAX_ERROR_MESSAGE_LEN];

View File

@ -57,7 +57,7 @@ static void uconv(char* from, char* to, int len)
static void xx(char* pattern, char* str, int from, int to, int mem, int not)
{
int r;
OnigPosition r;
char cpat[4000], cstr[4000];
#ifdef POSIX_TEST
@ -118,6 +118,7 @@ static void xx(char* pattern, char* str, int from, int to, int mem, int not)
OnigCompileInfo ci;
OnigErrorInfo einfo;
OnigSyntaxType syn = *ONIG_SYNTAX_DEFAULT;
OnigIterator it = {onig_default_charat, str};
/* ONIG_OPTION_OFF(syn.options, ONIG_OPTION_ASCII_RANGE); */
@ -148,8 +149,8 @@ static void xx(char* pattern, char* str, int from, int to, int mem, int not)
return ;
}
r = onig_search(reg, (UChar* )str, (UChar* )(str + ulen(str)),
(UChar* )str, (UChar* )(str + ulen(str)),
r = onig_search(&it, reg, 0, ulen(str),
0, ulen(str),
region, ONIG_OPTION_NONE);
if (r < ONIG_MISMATCH) {
char s[ONIG_MAX_ERROR_MESSAGE_LEN];

View File

@ -33,7 +33,7 @@ static OnigRegion* region;
static void xx(char* pattern, char* str, int from, int to, int mem, int not)
{
int r;
OnigPosition r;
#ifdef POSIX_TEST
regex_t reg;
@ -89,6 +89,7 @@ static void xx(char* pattern, char* str, int from, int to, int mem, int not)
regex_t* reg;
OnigErrorInfo einfo;
OnigSyntaxType syn = *ONIG_SYNTAX_DEFAULT;
OnigIterator it = {onig_default_charat, str};
/* ONIG_OPTION_OFF(syn.options, ONIG_OPTION_ASCII_RANGE); */
@ -102,8 +103,8 @@ static void xx(char* pattern, char* str, int from, int to, int mem, int not)
return ;
}
r = onig_search(reg, (UChar* )str, (UChar* )(str + SLEN(str)),
(UChar* )str, (UChar* )(str + SLEN(str)),
r = onig_search(&it, reg, 0, SLEN(str),
0, SLEN(str),
region, ONIG_OPTION_NONE);
if (r < ONIG_MISMATCH) {
char s[ONIG_MAX_ERROR_MESSAGE_LEN];

View File

@ -0,0 +1,7 @@
# Components
Below is a list of (some) WindTerm components in alphabetical order, along with a brief description of each.
## Onigmo
An improved version based on Onigmo 5.13.5. In particular, the addition of iterator makes it possible to match gap buffer or nonadjacent memory blocks. Please refer to the sample files for how to use.