mirror of
https://github.com/mamoe/mirai.git
synced 2025-01-07 16:40:43 +08:00
Add HtmlEntity
for decode html entity
This commit is contained in:
parent
b423430b74
commit
5b170ab63a
264
mirai-core-utils/src/commonMain/kotlin/HtmlEntity.kt
Normal file
264
mirai-core-utils/src/commonMain/kotlin/HtmlEntity.kt
Normal file
@ -0,0 +1,264 @@
|
||||
/*
|
||||
* Copyright 2019-2021 Mamoe Technologies and contributors.
|
||||
*
|
||||
* 此源代码的使用受 GNU AFFERO GENERAL PUBLIC LICENSE version 3 许可证的约束, 可以在以下链接找到该许可证.
|
||||
* Use of this source code is governed by the GNU AGPLv3 license that can be found through the following link.
|
||||
*
|
||||
* https://github.com/mamoe/mirai/blob/dev/LICENSE
|
||||
*/
|
||||
|
||||
package net.mamoe.mirai.utils
|
||||
|
||||
@Suppress("RegExpRedundantEscape")
|
||||
private val STR_TO_CHAR_PATTERN = """\&(\#?[A-Za-z0-9]+?)\;""".toRegex()
|
||||
|
||||
public fun String.decodeHtmlEscape(): String = replace(STR_TO_CHAR_PATTERN) { match ->
|
||||
STR_TO_CHAR_MAPPINGS[match.value]?.let { return@replace it }
|
||||
val match1 = match.groups[1]!!.value
|
||||
if (match1.length > 1 && match1[0] == '#') {
|
||||
if (match1.length > 2) {
|
||||
if (match1[1] == 'x') { // hex
|
||||
match1.substring(2).toIntOrNull(16)?.let {
|
||||
return@replace it.toChar().toString()
|
||||
}
|
||||
}
|
||||
}
|
||||
match1.substring(1).toIntOrNull()?.let {
|
||||
return@replace it.toChar().toString()
|
||||
}
|
||||
}
|
||||
|
||||
match.value
|
||||
}
|
||||
|
||||
|
||||
private val STR_TO_CHAR_MAPPINGS: Map<String, String> by lazy {
|
||||
//<editor-fold defaultstate="collapsed" desc="Generated Code">
|
||||
val result = HashMap<String, String>(223)
|
||||
result["&"] = "\u0026"
|
||||
result["<"] = "\u003c"
|
||||
result[">"] = "\u003e"
|
||||
result[" "] = "\u00a0"
|
||||
result["¡"] = "\u00a1"
|
||||
result["¢"] = "\u00a2"
|
||||
result["£"] = "\u00a3"
|
||||
result["¤"] = "\u00a4"
|
||||
result["¥"] = "\u00a5"
|
||||
result["¦"] = "\u00a6"
|
||||
result["§"] = "\u00a7"
|
||||
result["¨"] = "\u00a8"
|
||||
result["©"] = "\u00a9"
|
||||
result["ª"] = "\u00aa"
|
||||
result["«"] = "\u00ab"
|
||||
result["¬"] = "\u00ac"
|
||||
result["­"] = "\u00ad"
|
||||
result["®"] = "\u00ae"
|
||||
result["¯"] = "\u00af"
|
||||
result["°"] = "\u00b0"
|
||||
result["±"] = "\u00b1"
|
||||
result["²"] = "\u00b2"
|
||||
result["³"] = "\u00b3"
|
||||
result["´"] = "\u00b4"
|
||||
result["µ"] = "\u00b5"
|
||||
result["¶"] = "\u00b6"
|
||||
result["·"] = "\u00b7"
|
||||
result["¸"] = "\u00b8"
|
||||
result["¹"] = "\u00b9"
|
||||
result["º"] = "\u00ba"
|
||||
result["»"] = "\u00bb"
|
||||
result["¼"] = "\u00bc"
|
||||
result["½"] = "\u00bd"
|
||||
result["¾"] = "\u00be"
|
||||
result["¿"] = "\u00bf"
|
||||
result["À"] = "\u00c0"
|
||||
result["Á"] = "\u00c1"
|
||||
result["Â"] = "\u00c2"
|
||||
result["Ã"] = "\u00c3"
|
||||
result["Ä"] = "\u00c4"
|
||||
result["Å"] = "\u00c5"
|
||||
result["Æ"] = "\u00c6"
|
||||
result["Ç"] = "\u00c7"
|
||||
result["È"] = "\u00c8"
|
||||
result["É"] = "\u00c9"
|
||||
result["Ê"] = "\u00ca"
|
||||
result["Ë"] = "\u00cb"
|
||||
result["Ì"] = "\u00cc"
|
||||
result["Í"] = "\u00cd"
|
||||
result["Î"] = "\u00ce"
|
||||
result["Ï"] = "\u00cf"
|
||||
result["Ð"] = "\u00d0"
|
||||
result["Ñ"] = "\u00d1"
|
||||
result["Ò"] = "\u00d2"
|
||||
result["Ó"] = "\u00d3"
|
||||
result["Ô"] = "\u00d4"
|
||||
result["Õ"] = "\u00d5"
|
||||
result["Ö"] = "\u00d6"
|
||||
result["×"] = "\u00d7"
|
||||
result["Ø"] = "\u00d8"
|
||||
result["Ù"] = "\u00d9"
|
||||
result["Ú"] = "\u00da"
|
||||
result["Û"] = "\u00db"
|
||||
result["Ü"] = "\u00dc"
|
||||
result["Ý"] = "\u00dd"
|
||||
result["Þ"] = "\u00de"
|
||||
result["ß"] = "\u00df"
|
||||
result["à"] = "\u00e0"
|
||||
result["á"] = "\u00e1"
|
||||
result["â"] = "\u00e2"
|
||||
result["ã"] = "\u00e3"
|
||||
result["ä"] = "\u00e4"
|
||||
result["å"] = "\u00e5"
|
||||
result["æ"] = "\u00e6"
|
||||
result["ç"] = "\u00e7"
|
||||
result["è"] = "\u00e8"
|
||||
result["é"] = "\u00e9"
|
||||
result["ê"] = "\u00ea"
|
||||
result["ë"] = "\u00eb"
|
||||
result["ì"] = "\u00ec"
|
||||
result["í"] = "\u00ed"
|
||||
result["î"] = "\u00ee"
|
||||
result["ï"] = "\u00ef"
|
||||
result["ð"] = "\u00f0"
|
||||
result["ñ"] = "\u00f1"
|
||||
result["ò"] = "\u00f2"
|
||||
result["ó"] = "\u00f3"
|
||||
result["ô"] = "\u00f4"
|
||||
result["õ"] = "\u00f5"
|
||||
result["ö"] = "\u00f6"
|
||||
result["÷"] = "\u00f7"
|
||||
result["ø"] = "\u00f8"
|
||||
result["ù"] = "\u00f9"
|
||||
result["ú"] = "\u00fa"
|
||||
result["û"] = "\u00fb"
|
||||
result["ü"] = "\u00fc"
|
||||
result["ý"] = "\u00fd"
|
||||
result["þ"] = "\u00fe"
|
||||
result["ÿ"] = "\u00ff"
|
||||
result["ƒ"] = "\u0192"
|
||||
result["Α"] = "\u0391"
|
||||
result["Β"] = "\u0392"
|
||||
result["Γ"] = "\u0393"
|
||||
result["Δ"] = "\u0394"
|
||||
result["Ε"] = "\u0395"
|
||||
result["Ζ"] = "\u0396"
|
||||
result["Η"] = "\u0397"
|
||||
result["Θ"] = "\u0398"
|
||||
result["Ι"] = "\u0399"
|
||||
result["Κ"] = "\u039a"
|
||||
result["Λ"] = "\u039b"
|
||||
result["Μ"] = "\u039c"
|
||||
result["Ν"] = "\u039d"
|
||||
result["Ξ"] = "\u039e"
|
||||
result["Ο"] = "\u039f"
|
||||
result["Π"] = "\u03a0"
|
||||
result["Ρ"] = "\u03a1"
|
||||
result["Σ"] = "\u03a3"
|
||||
result["Τ"] = "\u03a4"
|
||||
result["Υ"] = "\u03a5"
|
||||
result["Φ"] = "\u03a6"
|
||||
result["Χ"] = "\u03a7"
|
||||
result["Ψ"] = "\u03a8"
|
||||
result["Ω"] = "\u03a9"
|
||||
result["α"] = "\u03b1"
|
||||
result["β"] = "\u03b2"
|
||||
result["γ"] = "\u03b3"
|
||||
result["δ"] = "\u03b4"
|
||||
result["ε"] = "\u03b5"
|
||||
result["ζ"] = "\u03b6"
|
||||
result["η"] = "\u03b7"
|
||||
result["θ"] = "\u03b8"
|
||||
result["ι"] = "\u03b9"
|
||||
result["κ"] = "\u03ba"
|
||||
result["λ"] = "\u03bb"
|
||||
result["μ"] = "\u03bc"
|
||||
result["ν"] = "\u03bd"
|
||||
result["ξ"] = "\u03be"
|
||||
result["ο"] = "\u03bf"
|
||||
result["π"] = "\u03c0"
|
||||
result["ρ"] = "\u03c1"
|
||||
result["ς"] = "\u03c2"
|
||||
result["σ"] = "\u03c3"
|
||||
result["τ"] = "\u03c4"
|
||||
result["υ"] = "\u03c5"
|
||||
result["φ"] = "\u03c6"
|
||||
result["χ"] = "\u03c7"
|
||||
result["ψ"] = "\u03c8"
|
||||
result["ω"] = "\u03c9"
|
||||
result["ϑ"] = "\u03d1"
|
||||
result["ϒ"] = "\u03d2"
|
||||
result["ϖ"] = "\u03d6"
|
||||
result["•"] = "\u2022"
|
||||
result["…"] = "\u2026"
|
||||
result["′"] = "\u2032"
|
||||
result["″"] = "\u2033"
|
||||
result["‾"] = "\u203e"
|
||||
result["⁄"] = "\u2044"
|
||||
result["℘"] = "\u2118"
|
||||
result["ℑ"] = "\u2111"
|
||||
result["ℜ"] = "\u211c"
|
||||
result["™"] = "\u2122"
|
||||
result["ℵ"] = "\u2135"
|
||||
result["←"] = "\u2190"
|
||||
result["↑"] = "\u2191"
|
||||
result["→"] = "\u2192"
|
||||
result["↓"] = "\u2193"
|
||||
result["↔"] = "\u2194"
|
||||
result["↵"] = "\u21b5"
|
||||
result["⇐"] = "\u21d0"
|
||||
result["⇑"] = "\u21d1"
|
||||
result["⇒"] = "\u21d2"
|
||||
result["⇓"] = "\u21d3"
|
||||
result["⇔"] = "\u21d4"
|
||||
result["∀"] = "\u2200"
|
||||
result["∂"] = "\u2202"
|
||||
result["∃"] = "\u2203"
|
||||
result["∅"] = "\u2205"
|
||||
result["∇"] = "\u2207"
|
||||
result["∈"] = "\u2208"
|
||||
result["∉"] = "\u2209"
|
||||
result["∋"] = "\u220b"
|
||||
result["∏"] = "\u220f"
|
||||
result["∑"] = "\u2211"
|
||||
result["−"] = "\u2212"
|
||||
result["∗"] = "\u2217"
|
||||
result["√"] = "\u221a"
|
||||
result["∝"] = "\u221d"
|
||||
result["∞"] = "\u221e"
|
||||
result["∠"] = "\u2220"
|
||||
result["∧"] = "\u2227"
|
||||
result["∨"] = "\u2228"
|
||||
result["∩"] = "\u2229"
|
||||
result["∪"] = "\u222a"
|
||||
result["∫"] = "\u222b"
|
||||
result["∴"] = "\u2234"
|
||||
result["∼"] = "\u223c"
|
||||
result["≅"] = "\u2245"
|
||||
result["≈"] = "\u2248"
|
||||
result["≠"] = "\u2260"
|
||||
result["≡"] = "\u2261"
|
||||
result["≤"] = "\u2264"
|
||||
result["≥"] = "\u2265"
|
||||
result["⊂"] = "\u2282"
|
||||
result["⊃"] = "\u2283"
|
||||
result["⊄"] = "\u2284"
|
||||
result["⊆"] = "\u2286"
|
||||
result["⊇"] = "\u2287"
|
||||
result["⊕"] = "\u2295"
|
||||
result["⊗"] = "\u2297"
|
||||
result["⊥"] = "\u22a5"
|
||||
result["⋅"] = "\u22c5"
|
||||
result["⌈"] = "\u2308"
|
||||
result["⌉"] = "\u2309"
|
||||
result["⌊"] = "\u230a"
|
||||
result["⌋"] = "\u230b"
|
||||
result["⟨"] = "\u2329"
|
||||
result["⟩"] = "\u232a"
|
||||
result["◊"] = "\u25ca"
|
||||
result["♠"] = "\u2660"
|
||||
result["♣"] = "\u2663"
|
||||
result["♥"] = "\u2665"
|
||||
result["♦"] = "\u2666"
|
||||
//</editor-fold>
|
||||
result
|
||||
}
|
||||
|
@ -0,0 +1,25 @@
|
||||
/*
|
||||
* Copyright 2019-2021 Mamoe Technologies and contributors.
|
||||
*
|
||||
* 此源代码的使用受 GNU AFFERO GENERAL PUBLIC LICENSE version 3 许可证的约束, 可以在以下链接找到该许可证.
|
||||
* Use of this source code is governed by the GNU AGPLv3 license that can be found through the following link.
|
||||
*
|
||||
* https://github.com/mamoe/mirai/blob/dev/LICENSE
|
||||
*/
|
||||
|
||||
package net.mamoe.mirai.utils
|
||||
|
||||
import kotlin.test.Test
|
||||
import kotlin.test.assertEquals
|
||||
|
||||
internal class HtmlEscapeTest {
|
||||
@Test
|
||||
fun testDecode() {
|
||||
val ALL = "Ͻ∏«ü⋅Φ∃≤ç‾∉ΗöêÞÒÔª⌉î⊄μ≥ß∧í>¯ëßÓ»κ÷♣ßζÍ∅〉η″∞\ÒêØ⌈ÜÏôλ♠èφÌ°ºÞ〈òÕ∏<™←⊥Α⌉õéÝμݙ˺∠ï∅òæΗ©Υþ♥îΕ←Ü∩Ê∗∅∪⇔ñðΔ℘↑ç…Â′‾Ζÿ♦óÉÿíÛâ∨⇑¤÷¶þε<→♥⊗ƒü∗ð″Ρ³Ε§þ∇ìÚ±Τè¥↓ýτñ≈¨ΠΔ∈〈ýë↑õ®ÛÏϖáÓ∫>Ê∇óΙψ∫ÎΖÐυ·≥∼òà∝∠∋∈û⊥ϖÐ⊆ΛøΗ♠⊂¨åéΨ∉∞⊂ñ£ℵÇ⟨¾¢Í♣öäΝØΑΘÌ⊕⊃Ú⊃ð⌊⊂↵⌈«ς∂ΞΙ◊ÙνÍ©↓¯β∩ì≠⌉↑∋⊄ο∗½Β∼∪ΡξÊν♥îâÖΜ®ο≡ℜ→Æ∀ÃΓ∫κξδïã⊇×Ëƒμ¿­↓ρ⋅Ο⊆æ¼Δ√àÔÈå⁄ø<ìľ¬↔ΓΩ∂⊇ 〉ÕëªÉ⁄ùíΕáκ•Á≤㡦ςéÁ׵ų⇓⇒χλυΖιý≥σ² αλ¶èªÌÖÄ⇔ ♦∑σϒφτ♦Κθϖβ→τ⇐ñΠΘùç⌊¿Õ∼≈&ϒϑγ⇓ℑσ™ΛηυΡ⇑γÔ°Àø´æ⌋ùℵ…⊄′≠ςζ⇒⌋⌈åÓÁÙ÷√∃≡⊃◊≡ρ¬⊗Û⇓∨∴¾öä§Ò∏Àϑó&⇐ℵ∧ℜ′Ñ⁄ÿπ⊆⇐∝∩ϒõãÑπ⇑⊕ƒ″‾⊇Ð∪Ü−á∞⊗ρôâ℘∑ι←⇒⌊∇∀♠ä◊θ↔¿¥©θúΦê>½Ο±Γ∀¡ü↵Ú≅∈Χ¨ιûℜÖû²∂¦ΞϑΒ´×↔¸Π­•»¯¹Ψ£§ℑú℘∃−♣εÇΩΝΑ∧λωÀ⊥ΧΒ≠πàΩ⇔°¤Þ¸¦¼δχΨΜ∨Μℑ∠δφ⊕Ý¡¥úŹɭ·αο≅γ⌋ËΧ∉ΛÅôΙΝΚΣ嶤³•ÆѺ®χ¢ÈΣ∴ïξβµΦΚ&ΤΘ…ÈΤ↵Ξ·Ç«µ£²ζÃÙψωναΥΣ⟩∑∝∋√⋅≤Ο¬Ø ¹Æ´¢ωÄΥ≅¸η≈∴−ψμ"
|
||||
val RESP = "\u00cf\u00bd\u220f\u00ab\u00fc\u22c5\u03a6\u2203\u2264\u00e7\u203e\u2209\u0397\u00f6\u00ea\u00de\u00d2\u00d4\u00aa\u2309\u00ee\u2284\u00ce\u00bc\u2265\u00df\u2227\u00ed\u003e\u00af\u00eb\u00df\u00d3\u00bb\u03ba\u00f7\u2663\u00df\u03b6\u00cd\u2205\u232a\u03b7\u2033\u221e\u005c\u00d2\u00ea\u00d8\u2308\u00dc\u00cf\u00f4\u03bb\u2660\u00e8\u03c6\u00cc\u00b0\u00ba\u00de\u2329\u00f2\u00d5\u220f\u003c\u2122\u2190\u22a5\u0391\u2309\u00f5\u00e9\u00dd\u03bc\u00dd\u2122\u00cb\u00ba\u2220\u00ef\u2205\u00f2\u00e6\u0397\u00a9\u03a5\u00fe\u2665\u00ee\u0395\u2190\u00dc\u2229\u00ca\u2217\u2205\u222a\u21d4\u00f1\u00f0\u0394\u2118\u2191\u00e7\u2026\u00c2\u2032\u203e\u0396\u00ff\u2666\u00f3\u00c9\u00ff\u00ed\u00db\u00e2\u2228\u21d1\u00a4\u00f7\u00b6\u00fe\u03b5\u003c\u2192\u2665\u2297\u0192\u00fc\u2217\u00f0\u2033\u03a1\u00b3\u0395\u00a7\u00fe\u2207\u00ec\u00da\u00b1\u03a4\u00e8\u00a5\u2193\u00fd\u03c4\u00f1\u2248\u00a8\u03a0\u0394\u2208\u2329\u00fd\u00eb\u2191\u00f5\u00ae\u00db\u00cf\u03d6\u00e1\u00d3\u222b\u003e\u00ca\u2207\u00f3\u0399\u03c8\u222b\u00ce\u0396\u00d0\u03c5\u00b7\u2265\u223c\u00f2\u00e0\u221d\u2220\u220b\u2208\u00fb\u22a5\u03d6\u00d0\u2286\u039b\u00f8\u0397\u2660\u2282\u00a8\u00e5\u00e9\u03a8\u2209\u221e\u2282\u00f1\u00a3\u2135\u00c7\u2329\u00be\u00a2\u00cd\u2663\u00f6\u00e4\u039d\u00d8\u0391\u0398\u00cc\u2295\u2283\u00da\u2283\u00f0\u230a\u2282\u21b5\u2308\u00ab\u03c2\u2202\u039e\u0399\u25ca\u00d9\u03bd\u00cd\u00a9\u2193\u00af\u03b2\u2229\u00ec\u2260\u2309\u2191\u220b\u2284\u03bf\u2217\u00bd\u0392\u223c\u222a\u03a1\u03be\u00ca\u03bd\u2665\u00ee\u00e2\u00d6\u039c\u00ae\u03bf\u2261\u211c\u2192\u00c6\u2200\u00c3\u0393\u222b\u03ba\u03be\u03b4\u00ef\u00e3\u2287\u00d7\u00cb\u0192\u03bc\u00bf\u00ad\u2193\u03c1\u22c5\u039f\u2286\u00e6\u00bc\u0394\u221a\u00e0\u00d4\u00c8\u00e5\u2044\u00f8\u003c\u00ec\u00c4\u00be\u00ac\u2194\u0393\u03a9\u2202\u2287\u00a0\u232a\u00d5\u00eb\u00aa\u00c9\u2044\u00f9\u00ed\u0395\u00e1\u03ba\u2022\u00c1\u2264\u00e3\u00a1\u00a6\u03c2\u00e9\u00c1\u00d7\u00b5\u00c5\u00b3\u21d3\u21d2\u03c7\u03bb\u03c5\u0396\u03b9\u00fd\u2265\u03c3\u00b2\u00a0\u03b1\u03bb\u00c2\u00b6\u00e8\u00aa\u00cc\u00d6\u00c4\u21d4\u000a\u2666\u2211\u03c3\u03d2\u03c6\u03c4\u2666\u039a\u03b8\u03d6\u03b2\u2192\u03c4\u21d0\u00c3\u00b1\u03a0\u0398\u00f9\u00e7\u230a\u00bf\u00d5\u223c\u2248\u0026\u03d2\u03d1\u03b3\u21d3\u2111\u03c3\u2122\u039b\u03b7\u03c5\u03a1\u21d1\u03b3\u00d4\u00c2\u00b0\u00c0\u00f8\u00b4\u00e6\u230b\u00f9\u2135\u2026\u2284\u2032\u2260\u03c2\u03b6\u21d2\u230b\u2308\u00e5\u00d3\u00c1\u00d9\u00f7\u221a\u2203\u2261\u2283\u25ca\u2261\u03c1\u00ac\u2297\u00db\u21d3\u2228\u2234\u00be\u00f6\u00e4\u00a7\u00d2\u220f\u00c0\u03d1\u00f3\u0026\u21d0\u2135\u2227\u211c\u2032\u00d1\u2044\u00ff\u03c0\u2286\u21d0\u221d\u2229\u03d2\u00f5\u00e3\u00d1\u03c0\u21d1\u2295\u0192\u2033\u203e\u2287\u00d0\u222a\u00dc\u2212\u00e1\u221e\u2297\u03c1\u00f4\u00e2\u2118\u2211\u03b9\u2190\u21d2\u230a\u2207\u2200\u2660\u00e4\u25ca\u03b8\u2194\u00bf\u00a5\u00a9\u03b8\u00fa\u03a6\u00ea\u003e\u00bd\u039f\u00b1\u0393\u2200\u00a1\u00fc\u21b5\u00da\u2245\u2208\u03a7\u00a8\u03b9\u00fb\u211c\u00d6\u00fb\u00b2\u2202\u00a6\u039e\u03d1\u0392\u00b4\u00d7\u2194\u00b8\u03a0\u00ad\u2022\u00bb\u00af\u00b9\u03a8\u00a3\u00a7\u2111\u00fa\u2118\u2203\u2212\u2663\u03b5\u00c7\u03a9\u039d\u0391\u2227\u00ce\u00bb\u03c9\u00c0\u22a5\u03a7\u0392\u2260\u03c0\u00e0\u03a9\u21d4\u00b0\u00a4\u00de\u00b8\u00a6\u00bc\u03b4\u03c7\u03a8\u039c\u2228\u039c\u2111\u2220\u03b4\u03c6\u2295\u00dd\u00a1\u00a5\u00fa\u00c5\u00b9\u00c9\u00ad\u00b7\u03b1\u03bf\u2245\u03b3\u230b\u00cb\u03a7\u2209\u039b\u00c5\u00f4\u0399\u039d\u039a\u03a3\u03b5\u00b6\u00a4\u00b3\u2022\u00c6\u00d1\u00ba\u00ae\u03c7\u00a2\u00c8\u03a3\u2234\u00ef\u03be\u03b2\u00b5\u03a6\u039a\u0026\u03a4\u0398\u2026\u00c8\u03a4\u21b5\u039e\u00b7\u00c7\u00ab\u00b5\u00a3\u00b2\u03b6\u00c3\u00d9\u03c8\u03c9\u03bd\u03b1\u03a5\u03a3\u232a\u2211\u221d\u220b\u221a\u22c5\u2264\u039f\u00ac\u00d8\u00a0\u00b9\u00c6\u00b4\u00a2\u03c9\u00c4\u03a5\u2245\u00b8\u03b7\u2248\u2234\u2212\u03c8\u03bc"
|
||||
assertEquals(
|
||||
RESP,
|
||||
ALL.decodeHtmlEscape()
|
||||
)
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user