東アジアの文字幅 (East Asian Width) の判定
Unicodeの文字が全角で表示されるか半角で表示されるかは東アジアの文字幅特性値がヒントを与えてくれるそうです。(日本語の場合は)この値がNa(狭)、N(中立)、H(半角)だと半角、W(広)、F(全角)、A(曖昧)だと全角として扱うことが推奨されているようです。
Pythonではunicodedataモジュールを使うとこの特性値を取得できますが、JavaScriptにはそのような関数は見当たりません。ですが、Unicode Consortiumが、どの文字がどの東アジアの文字幅を持つかのデータファイルを公開しているので、そこから判定用のコードを機械的に生成できるはずです。
で、以下が実際に生成したコードです。データファイルに、データファイルに出現しない文字はNとなるとあるので、以下ではN以外(F、H、W、Na、A)についてのみチェックを行い、それ以外をNと判定するようにしています。
コメントで指摘された通り、サロゲートペアの判定部分が間違っていたので修正。
function EastAsianWidth(character) { var x = character.charCodeAt(0); var y = (character.length == 2) ? character.charCodeAt(1) : 0; var codePoint = x; if ((0xD800 <= x && x <= 0xDBFF) && (0xDC00 <= y && y <= 0xDFFF)) { x &= 0x3FF; y &= 0x3FF; codePoint = (x << 10) | y; codePoint += 0x10000; } if ( (0x3000 == codePoint) || (0xFF01 <= codePoint && codePoint <= 0xFF60) || (0xFFE0 <= codePoint && codePoint <= 0xFFE6) ) { return 'F'; } if ( (0x20A9 == codePoint) || (0xFF61 <= codePoint && codePoint <= 0xFFBE) || (0xFFC2 <= codePoint && codePoint <= 0xFFC7) || (0xFFCA <= codePoint && codePoint <= 0xFFCF) || (0xFFD2 <= codePoint && codePoint <= 0xFFD7) || (0xFFDA <= codePoint && codePoint <= 0xFFDC) || (0xFFE8 <= codePoint && codePoint <= 0xFFEE) ) { return 'H'; } if ( (0x1100 <= codePoint && codePoint <= 0x115F) || (0x11A3 <= codePoint && codePoint <= 0x11A7) || (0x11FA <= codePoint && codePoint <= 0x11FF) || (0x2329 <= codePoint && codePoint <= 0x232A) || (0x2E80 <= codePoint && codePoint <= 0x2E99) || (0x2E9B <= codePoint && codePoint <= 0x2EF3) || (0x2F00 <= codePoint && codePoint <= 0x2FD5) || (0x2FF0 <= codePoint && codePoint <= 0x2FFB) || (0x3001 <= codePoint && codePoint <= 0x303E) || (0x3041 <= codePoint && codePoint <= 0x3096) || (0x3099 <= codePoint && codePoint <= 0x30FF) || (0x3105 <= codePoint && codePoint <= 0x312D) || (0x3131 <= codePoint && codePoint <= 0x318E) || (0x3190 <= codePoint && codePoint <= 0x31BA) || (0x31C0 <= codePoint && codePoint <= 0x31E3) || (0x31F0 <= codePoint && codePoint <= 0x321E) || (0x3220 <= codePoint && codePoint <= 0x3247) || (0x3250 <= codePoint && codePoint <= 0x32FE) || (0x3300 <= codePoint && codePoint <= 0x4DBF) || (0x4E00 <= codePoint && codePoint <= 0xA48C) || (0xA490 <= codePoint && codePoint <= 0xA4C6) || (0xA960 <= codePoint && codePoint <= 0xA97C) || (0xAC00 <= codePoint && codePoint <= 0xD7A3) || (0xD7B0 <= codePoint && codePoint <= 0xD7C6) || (0xD7CB <= codePoint && codePoint <= 0xD7FB) || (0xF900 <= codePoint && codePoint <= 0xFAFF) || (0xFE10 <= codePoint && codePoint <= 0xFE19) || (0xFE30 <= codePoint && codePoint <= 0xFE52) || (0xFE54 <= codePoint && codePoint <= 0xFE66) || (0xFE68 <= codePoint && codePoint <= 0xFE6B) || (0x1B000 <= codePoint && codePoint <= 0x1B001) || (0x1F200 <= codePoint && codePoint <= 0x1F202) || (0x1F210 <= codePoint && codePoint <= 0x1F23A) || (0x1F240 <= codePoint && codePoint <= 0x1F248) || (0x1F250 <= codePoint && codePoint <= 0x1F251) || (0x20000 <= codePoint && codePoint <= 0x2F73F) || (0x2B740 <= codePoint && codePoint <= 0x2FFFD) || (0x30000 <= codePoint && codePoint <= 0x3FFFD) ) { return 'W'; } if ( (0x0020 <= codePoint && codePoint <= 0x007E) || (0x00A2 <= codePoint && codePoint <= 0x00A3) || (0x00A5 <= codePoint && codePoint <= 0x00A6) || (0x00AC == codePoint) || (0x00AF == codePoint) || (0x27E6 <= codePoint && codePoint <= 0x27ED) || (0x2985 <= codePoint && codePoint <= 0x2986) ) { return 'Na'; } if ( (0x00A1 == codePoint) || (0x00A4 == codePoint) || (0x00A7 <= codePoint && codePoint <= 0x00A8) || (0x00AA == codePoint) || (0x00AD <= codePoint && codePoint <= 0x00AE) || (0x00B0 <= codePoint && codePoint <= 0x00B4) || (0x00B6 <= codePoint && codePoint <= 0x00BA) || (0x00BC <= codePoint && codePoint <= 0x00BF) || (0x00C6 == codePoint) || (0x00D0 == codePoint) || (0x00D7 <= codePoint && codePoint <= 0x00D8) || (0x00DE <= codePoint && codePoint <= 0x00E1) || (0x00E6 == codePoint) || (0x00E8 <= codePoint && codePoint <= 0x00EA) || (0x00EC <= codePoint && codePoint <= 0x00ED) || (0x00F0 == codePoint) || (0x00F2 <= codePoint && codePoint <= 0x00F3) || (0x00F7 <= codePoint && codePoint <= 0x00FA) || (0x00FC == codePoint) || (0x00FE == codePoint) || (0x0101 == codePoint) || (0x0111 == codePoint) || (0x0113 == codePoint) || (0x011B == codePoint) || (0x0126 <= codePoint && codePoint <= 0x0127) || (0x012B == codePoint) || (0x0131 <= codePoint && codePoint <= 0x0133) || (0x0138 == codePoint) || (0x013F <= codePoint && codePoint <= 0x0142) || (0x0144 == codePoint) || (0x0148 <= codePoint && codePoint <= 0x014B) || (0x014D == codePoint) || (0x0152 <= codePoint && codePoint <= 0x0153) || (0x0166 <= codePoint && codePoint <= 0x0167) || (0x016B == codePoint) || (0x01CE == codePoint) || (0x01D0 == codePoint) || (0x01D2 == codePoint) || (0x01D4 == codePoint) || (0x01D6 == codePoint) || (0x01D8 == codePoint) || (0x01DA == codePoint) || (0x01DC == codePoint) || (0x0251 == codePoint) || (0x0261 == codePoint) || (0x02C4 == codePoint) || (0x02C7 == codePoint) || (0x02C9 <= codePoint && codePoint <= 0x02CB) || (0x02CD == codePoint) || (0x02D0 == codePoint) || (0x02D8 <= codePoint && codePoint <= 0x02DB) || (0x02DD == codePoint) || (0x02DF == codePoint) || (0x0300 <= codePoint && codePoint <= 0x036F) || (0x0391 <= codePoint && codePoint <= 0x03A1) || (0x03A3 <= codePoint && codePoint <= 0x03A9) || (0x03B1 <= codePoint && codePoint <= 0x03C1) || (0x03C3 <= codePoint && codePoint <= 0x03C9) || (0x0401 == codePoint) || (0x0410 <= codePoint && codePoint <= 0x044F) || (0x0451 == codePoint) || (0x2010 == codePoint) || (0x2013 <= codePoint && codePoint <= 0x2016) || (0x2018 <= codePoint && codePoint <= 0x2019) || (0x201C <= codePoint && codePoint <= 0x201D) || (0x2020 <= codePoint && codePoint <= 0x2022) || (0x2024 <= codePoint && codePoint <= 0x2027) || (0x2030 == codePoint) || (0x2032 <= codePoint && codePoint <= 0x2033) || (0x2035 == codePoint) || (0x203B == codePoint) || (0x203E == codePoint) || (0x2074 == codePoint) || (0x207F == codePoint) || (0x2081 <= codePoint && codePoint <= 0x2084) || (0x20AC == codePoint) || (0x2103 == codePoint) || (0x2105 == codePoint) || (0x2109 == codePoint) || (0x2113 == codePoint) || (0x2116 == codePoint) || (0x2121 <= codePoint && codePoint <= 0x2122) || (0x2126 == codePoint) || (0x212B == codePoint) || (0x2153 <= codePoint && codePoint <= 0x2154) || (0x215B <= codePoint && codePoint <= 0x215E) || (0x2160 <= codePoint && codePoint <= 0x216B) || (0x2170 <= codePoint && codePoint <= 0x2179) || (0x2189 == codePoint) || (0x2190 <= codePoint && codePoint <= 0x2199) || (0x21B8 <= codePoint && codePoint <= 0x21B9) || (0x21D2 == codePoint) || (0x21D4 == codePoint) || (0x21E7 == codePoint) || (0x2200 == codePoint) || (0x2202 <= codePoint && codePoint <= 0x2203) || (0x2207 <= codePoint && codePoint <= 0x2208) || (0x220B == codePoint) || (0x220F == codePoint) || (0x2211 == codePoint) || (0x2215 == codePoint) || (0x221A == codePoint) || (0x221D <= codePoint && codePoint <= 0x2220) || (0x2223 == codePoint) || (0x2225 == codePoint) || (0x2227 <= codePoint && codePoint <= 0x222C) || (0x222E == codePoint) || (0x2234 <= codePoint && codePoint <= 0x2237) || (0x223C <= codePoint && codePoint <= 0x223D) || (0x2248 == codePoint) || (0x224C == codePoint) || (0x2252 == codePoint) || (0x2260 <= codePoint && codePoint <= 0x2261) || (0x2264 <= codePoint && codePoint <= 0x2267) || (0x226A <= codePoint && codePoint <= 0x226B) || (0x226E <= codePoint && codePoint <= 0x226F) || (0x2282 <= codePoint && codePoint <= 0x2283) || (0x2286 <= codePoint && codePoint <= 0x2287) || (0x2295 == codePoint) || (0x2299 == codePoint) || (0x22A5 == codePoint) || (0x22BF == codePoint) || (0x2312 == codePoint) || (0x2460 <= codePoint && codePoint <= 0x24E9) || (0x24EB <= codePoint && codePoint <= 0x254B) || (0x2550 <= codePoint && codePoint <= 0x2573) || (0x2580 <= codePoint && codePoint <= 0x258F) || (0x2592 <= codePoint && codePoint <= 0x2595) || (0x25A0 <= codePoint && codePoint <= 0x25A1) || (0x25A3 <= codePoint && codePoint <= 0x25A9) || (0x25B2 <= codePoint && codePoint <= 0x25B3) || (0x25B6 <= codePoint && codePoint <= 0x25B7) || (0x25BC <= codePoint && codePoint <= 0x25BD) || (0x25C0 <= codePoint && codePoint <= 0x25C1) || (0x25C6 <= codePoint && codePoint <= 0x25C8) || (0x25CB == codePoint) || (0x25CE <= codePoint && codePoint <= 0x25D1) || (0x25E2 <= codePoint && codePoint <= 0x25E5) || (0x25EF == codePoint) || (0x2605 <= codePoint && codePoint <= 0x2606) || (0x2609 == codePoint) || (0x260E <= codePoint && codePoint <= 0x260F) || (0x2614 <= codePoint && codePoint <= 0x2615) || (0x261C == codePoint) || (0x261E == codePoint) || (0x2640 == codePoint) || (0x2642 == codePoint) || (0x2660 <= codePoint && codePoint <= 0x2661) || (0x2663 <= codePoint && codePoint <= 0x2665) || (0x2667 <= codePoint && codePoint <= 0x266A) || (0x266C <= codePoint && codePoint <= 0x266D) || (0x266F == codePoint) || (0x269E <= codePoint && codePoint <= 0x269F) || (0x26BE <= codePoint && codePoint <= 0x26BF) || (0x26C4 <= codePoint && codePoint <= 0x26CD) || (0x26CF <= codePoint && codePoint <= 0x26E1) || (0x26E3 == codePoint) || (0x26E8 <= codePoint && codePoint <= 0x26FF) || (0x273D == codePoint) || (0x2757 == codePoint) || (0x2776 <= codePoint && codePoint <= 0x277F) || (0x2B55 <= codePoint && codePoint <= 0x2B59) || (0x3248 <= codePoint && codePoint <= 0x324F) || (0xE000 <= codePoint && codePoint <= 0xF8FF) || (0xFE00 <= codePoint && codePoint <= 0xFE0F) || (0xFFFD == codePoint) || (0x1F100 <= codePoint && codePoint <= 0x1F10A) || (0x1F110 <= codePoint && codePoint <= 0x1F12D) || (0x1F130 <= codePoint && codePoint <= 0x1F169) || (0x1F170 <= codePoint && codePoint <= 0x1F19A) || (0xE0100 <= codePoint && codePoint <= 0xE01EF) || (0xF0000 <= codePoint && codePoint <= 0xFFFFD) || (0x100000 <= codePoint && codePoint <= 0x10FFFD) ) { return 'A'; } return 'N'; }