TinySegmenter.jl を Javascript へ逆移植してみた

昨日、途中で時間切れとなった、

の再チャレンジです。


話の中身

Javascript で書かれた小さな日本語分かち書きソフトウェア

として、

と言うのが有って、さらに、それを高速化しつつ他の言語に移植された一例として、

と言う実装が有るのですが、今回、これの最適化手法を参考にしつつ、 最新の Javascript へ逆移植したモノを作ってみました。

成果物

/* TinySegmenter.optmized.js
* =========================
*
*  * A reverse porting to Modern Javascript from optimized implementation of TinySegmenter written by Julia-lang.
*
* LICENSE
* =======
*  (c) 2008 Taku Kudo                    <taku@chasen.org>
*  (c) 2015 Michiaki Ariga a.k.a chezou  <chezou@gmail.com>
*  (c) 2017 Naoki OKAMURA  a.k.a nyarla  <nyarla@thotep.net>
* 
*  All rights reserved.
*
*  Redistribution and use in source and binary forms, with or without
*  modification, are permitted provided that the following conditions are met:
*
*      * Redistributions of source code must retain the above copyright notice,
*  this list of conditions and the following disclaimer.
*      * Redistributions in binary form must reproduce the above copyright
*  notice, this list of conditions and the following disclaimer in the
*  documentation and/or other materials provided with the distribution.
*      * Neither the name of the <ORGANIZATION> nor the names of its
*  contributors may be used to endorse or promote products derived from this
*  software without specific prior written permission.
*
*  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
*  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
*  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
*  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
*  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
*  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
*  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
*  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
*  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
*  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
*  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* */

"use strict";

var BIAS = -332;

var BC1 = new Map([
"HH" ,  6  ,
"II" ,  2461  ,
"KH" ,  406  ,
"OH" ,  -1378 
]);

var BC2 = new Map([
"AA" ,  -3267  ,
"AI" ,  2744  ,
"AN" ,  -878  ,
"HH" ,  -4070  ,
"HM" ,  -1711  ,
"HN" ,  4012  ,
"HO" ,  3761  ,
"IA" ,  1327  ,
"IH" ,  -1184  ,
"II" ,  -1332  ,
"IK" ,  1721  ,
"IO" ,  5492  ,
"KI" ,  3831  ,
"KK" ,  -8741  ,
"MH" ,  -3132  ,
"MK" ,  3334  ,
"OO" ,  -2920 
]);

var BC3 = new Map([
"HH" ,  996  ,
"HI" ,  626  ,
"HK" ,  -721  ,
"HN" ,  -1307  ,
"HO" ,  -836  ,
"IH" ,  -301  ,
"KK" ,  2762  ,
"MK" ,  1079  ,
"MM" ,  4034  ,
"OA" ,  -1652  ,
"OH" ,  266 
]);

var BP1 = new Map([
"BB" ,  295  ,
"OB" ,  304  ,
"OO" ,  -125  ,
"UB" ,  352 
]);

var BP2 = new Map([
"BO" ,  60  ,
"OO" ,  -1762 
]);

var BQ1 = new Map([
"BHH" ,  1150  ,
"BHM" ,  1521  ,
"BII" ,  -1158  ,
"BIM" ,  886  ,
"BMH" ,  1208  ,
"BNH" ,  449  ,
"BOH" ,  -91  ,
"BOO" ,  -2597  ,
"OHI" ,  451  ,
"OIH" ,  -296  ,
"OKA" ,  1851  ,
"OKH" ,  -1020  ,
"OKK" ,  904  ,
"OOO" ,  2965 
]);

var BQ2 = new Map([
"BHH" ,  118  ,
"BHI" ,  -1159  ,
"BHM" ,  466  ,
"BIH" ,  -919  ,
"BKK" ,  -1720  ,
"BKO" ,  864  ,
"OHH" ,  -1139  ,
"OHM" ,  -181  ,
"OIH" ,  153  ,
"UHI" ,  -1146 
]);

var BQ3 = new Map([
"BHH" ,  -792  ,
"BHI" ,  2664  ,
"BII" ,  -299  ,
"BKI" ,  419  ,
"BMH" ,  937  ,
"BMM" ,  8335  ,
"BNN" ,  998  ,
"BOH" ,  775  ,
"OHH" ,  2174  ,
"OHM" ,  439  ,
"OII" ,  280  ,
"OKH" ,  1798  ,
"OKI" ,  -793  ,
"OKO" ,  -2242  ,
"OMH" ,  -2402  ,
"OOO" ,  11699 
]);

var BQ4 = new Map([
"BHH" ,  -3895  ,
"BIH" ,  3761  ,
"BII" ,  -4654  ,
"BIK" ,  1348  ,
"BKK" ,  -1806  ,
"BMI" ,  -3385  ,
"BOO" ,  -12396  ,
"OAH" ,  926  ,
"OHH" ,  266  ,
"OHK" ,  -2036  ,
"ONN" ,  -973 
]);

var BW1 = new Map([
",と" ,  660  ,
",同" ,  727  ,
"B1あ" ,  1404  ,
"B1同" ,  542  ,
"、と" ,  660  ,
"、同" ,  727  ,
"」と" ,  1682  ,
"あっ" ,  1505  ,
"いう" ,  1743  ,
"いっ" ,  -2055  ,
"いる" ,  672  ,
"うし" ,  -4817  ,
"うん" ,  665  ,
"から" ,  3472  ,
"がら" ,  600  ,
"こう" ,  -790  ,
"こと" ,  2083  ,
"こん" ,  -1262  ,
"さら" ,  -4143  ,
"さん" ,  4573  ,
"した" ,  2641  ,
"して" ,  1104  ,
"すで" ,  -3399  ,
"そこ" ,  1977  ,
"それ" ,  -871  ,
"たち" ,  1122  ,
"ため" ,  601  ,
"った" ,  3463  ,
"つい" ,  -802  ,
"てい" ,  805  ,
"てき" ,  1249  ,
"でき" ,  1127  ,
"です" ,  3445  ,
"では" ,  844  ,
"とい" ,  -4915  ,
"とみ" ,  1922  ,
"どこ" ,  3887  ,
"ない" ,  5713  ,
"なっ" ,  3015  ,
"など" ,  7379  ,
"なん" ,  -1113  ,
"にし" ,  2468  ,
"には" ,  1498  ,
"にも" ,  1671  ,
"に対" ,  -912  ,
"の一" ,  -501  ,
"の中" ,  741  ,
"ませ" ,  2448  ,
"まで" ,  1711  ,
"まま" ,  2600  ,
"まる" ,  -2155  ,
"やむ" ,  -1947  ,
"よっ" ,  -2565  ,
"れた" ,  2369  ,
"れで" ,  -913  ,
"をし" ,  1860  ,
"を見" ,  731  ,
"亡く" ,  -1886  ,
"京都" ,  2558  ,
"取り" ,  -2784  ,
"大き" ,  -2604  ,
"大阪" ,  1497  ,
"平方" ,  -2314  ,
"引き" ,  -1336  ,
"日本" ,  -195  ,
"本当" ,  -2423  ,
"毎日" ,  -2113  ,
"目指" ,  -724  ,
"B1あ" ,  1404  ,
"B1同" ,  542  ,
"」と" ,  1682 
]);

var BW2 = new Map([
".." ,  -11822  ,
"11" ,  -669  ,
"――" ,  -5730  ,
"−−" ,  -13175  ,
"いう" ,  -1609  ,
"うか" ,  2490  ,
"かし" ,  -1350  ,
"かも" ,  -602  ,
"から" ,  -7194  ,
"かれ" ,  4612  ,
"がい" ,  853  ,
"がら" ,  -3198  ,
"きた" ,  1941  ,
"くな" ,  -1597  ,
"こと" ,  -8392  ,
"この" ,  -4193  ,
"させ" ,  4533  ,
"され" ,  13168  ,
"さん" ,  -3977  ,
"しい" ,  -1819  ,
"しか" ,  -545  ,
"した" ,  5078  ,
"して" ,  972  ,
"しな" ,  939  ,
"その" ,  -3744  ,
"たい" ,  -1253  ,
"たた" ,  -662  ,
"ただ" ,  -3857  ,
"たち" ,  -786  ,
"たと" ,  1224  ,
"たは" ,  -939  ,
"った" ,  4589  ,
"って" ,  1647  ,
"っと" ,  -2094  ,
"てい" ,  6144  ,
"てき" ,  3640  ,
"てく" ,  2551  ,
"ては" ,  -3110  ,
"ても" ,  -3065  ,
"でい" ,  2666  ,
"でき" ,  -1528  ,
"でし" ,  -3828  ,
"です" ,  -4761  ,
"でも" ,  -4203  ,
"とい" ,  1890  ,
"とこ" ,  -1746  ,
"とと" ,  -2279  ,
"との" ,  720  ,
"とみ" ,  5168  ,
"とも" ,  -3941  ,
"ない" ,  -2488  ,
"なが" ,  -1313  ,
"など" ,  -6509  ,
"なの" ,  2614  ,
"なん" ,  3099  ,
"にお" ,  -1615  ,
"にし" ,  2748  ,
"にな" ,  2454  ,
"によ" ,  -7236  ,
"に対" ,  -14943  ,
"に従" ,  -4688  ,
"に関" ,  -11388  ,
"のか" ,  2093  ,
"ので" ,  -7059  ,
"のに" ,  -6041  ,
"のの" ,  -6125  ,
"はい" ,  1073  ,
"はが" ,  -1033  ,
"はず" ,  -2532  ,
"ばれ" ,  1813  ,
"まし" ,  -1316  ,
"まで" ,  -6621  ,
"まれ" ,  5409  ,
"めて" ,  -3153  ,
"もい" ,  2230  ,
"もの" ,  -10713  ,
"らか" ,  -944  ,
"らし" ,  -1611  ,
"らに" ,  -1897  ,
"りし" ,  651  ,
"りま" ,  1620  ,
"れた" ,  4270  ,
"れて" ,  849  ,
"れば" ,  4114  ,
"ろう" ,  6067  ,
"われ" ,  7901  ,
"を通" ,  -11877  ,
"んだ" ,  728  ,
"んな" ,  -4115  ,
"一人" ,  602  ,
"一方" ,  -1375  ,
"一日" ,  970  ,
"一部" ,  -1051  ,
"上が" ,  -4479  ,
"会社" ,  -1116  ,
"出て" ,  2163  ,
"分の" ,  -7758  ,
"同党" ,  970  ,
"同日" ,  -913  ,
"大阪" ,  -2471  ,
"委員" ,  -1250  ,
"少な" ,  -1050  ,
"年度" ,  -8669  ,
"年間" ,  -1626  ,
"府県" ,  -2363  ,
"手権" ,  -1982  ,
"新聞" ,  -4066  ,
"日新" ,  -722  ,
"日本" ,  -7068  ,
"日米" ,  3372  ,
"曜日" ,  -601  ,
"朝鮮" ,  -2355  ,
"本人" ,  -2697  ,
"東京" ,  -1543  ,
"然と" ,  -1384  ,
"社会" ,  -1276  ,
"立て" ,  -990  ,
"第に" ,  -1612  ,
"米国" ,  -4268  ,
"11" ,  -669 
]);

var BW3 = new Map([
"あた" ,  -2194  ,
"あり" ,  719  ,
"ある" ,  3846  ,
"い." ,  -1185  ,
"い。" ,  -1185  ,
"いい" ,  5308  ,
"いえ" ,  2079  ,
"いく" ,  3029  ,
"いた" ,  2056  ,
"いっ" ,  1883  ,
"いる" ,  5600  ,
"いわ" ,  1527  ,
"うち" ,  1117  ,
"うと" ,  4798  ,
"えと" ,  1454  ,
"か." ,  2857  ,
"か。" ,  2857  ,
"かけ" ,  -743  ,
"かっ" ,  -4098  ,
"かに" ,  -669  ,
"から" ,  6520  ,
"かり" ,  -2670  ,
"が,",  1816  ,
"が、" ,  1816  ,
"がき" ,  -4855  ,
"がけ" ,  -1127  ,
"がっ" ,  -913  ,
"がら" ,  -4977  ,
"がり" ,  -2064  ,
"きた" ,  1645  ,
"けど" ,  1374  ,
"こと" ,  7397  ,
"この" ,  1542  ,
"ころ" ,  -2757  ,
"さい" ,  -714  ,
"さを" ,  976  ,
"し,",  1557  ,
"し、" ,  1557  ,
"しい" ,  -3714  ,
"した" ,  3562  ,
"して" ,  1449  ,
"しな" ,  2608  ,
"しま" ,  1200  ,
"す." ,  -1310  ,
"す。" ,  -1310  ,
"する" ,  6521  ,
"ず,",  3426  ,
"ず、" ,  3426  ,
"ずに" ,  841  ,
"そう" ,  428  ,
"た." ,  8875  ,
"た。" ,  8875  ,
"たい" ,  -594  ,
"たの" ,  812  ,
"たり" ,  -1183  ,
"たる" ,  -853  ,
"だ." ,  4098  ,
"だ。" ,  4098  ,
"だっ" ,  1004  ,
"った" ,  -4748  ,
"って" ,  300  ,
"てい" ,  6240  ,
"てお" ,  855  ,
"ても" ,  302  ,
"です" ,  1437  ,
"でに" ,  -1482  ,
"では" ,  2295  ,
"とう" ,  -1387  ,
"とし" ,  2266  ,
"との" ,  541  ,
"とも" ,  -3543  ,
"どう" ,  4664  ,
"ない" ,  1796  ,
"なく" ,  -903  ,
"など" ,  2135  ,
"に,",  -1021  ,
"に、" ,  -1021  ,
"にし" ,  1771  ,
"にな" ,  1906  ,
"には" ,  2644  ,
"の,",  -724  ,
"の、" ,  -724  ,
"の子" ,  -1000  ,
"は,",  1337  ,
"は、" ,  1337  ,
"べき" ,  2181  ,
"まし" ,  1113  ,
"ます" ,  6943  ,
"まっ" ,  -1549  ,
"まで" ,  6154  ,
"まれ" ,  -793  ,
"らし" ,  1479  ,
"られ" ,  6820  ,
"るる" ,  3818  ,
"れ,",  854  ,
"れ、" ,  854  ,
"れた" ,  1850  ,
"れて" ,  1375  ,
"れば" ,  -3246  ,
"れる" ,  1091  ,
"われ" ,  -605  ,
"んだ" ,  606  ,
"んで" ,  798  ,
"カ月" ,  990  ,
"会議" ,  860  ,
"入り" ,  1232  ,
"大会" ,  2217  ,
"始め" ,  1681  ,
"市" ,  965  ,
"新聞" ,  -5055  ,
"日,",  974  ,
"日、" ,  974  ,
"社会" ,  2024  ,
"カ月" ,  990 
]);

var TC1 = new Map([
"AAA" ,  1093  ,
"HHH" ,  1029  ,
"HHM" ,  580  ,
"HII" ,  998  ,
"HOH" ,  -390  ,
"HOM" ,  -331  ,
"IHI" ,  1169  ,
"IOH" ,  -142  ,
"IOI" ,  -1015  ,
"IOM" ,  467  ,
"MMH" ,  187  ,
"OOI" ,  -1832 
]);

var TC2 = new Map([
"HHO" ,  2088  ,
"HII" ,  -1023  ,
"HMM" ,  -1154  ,
"IHI" ,  -1965  ,
"KKH" ,  703  ,
"OII" ,  -2649 
]);

var TC3 = new Map([
"AAA" ,  -294  ,
"HHH" ,  346  ,
"HHI" ,  -341  ,
"HII" ,  -1088  ,
"HIK" ,  731  ,
"HOH" ,  -1486  ,
"IHH" ,  128  ,
"IHI" ,  -3041  ,
"IHO" ,  -1935  ,
"IIH" ,  -825  ,
"IIM" ,  -1035  ,
"IOI" ,  -542  ,
"KHH" ,  -1216  ,
"KKA" ,  491  ,
"KKH" ,  -1217  ,
"KOK" ,  -1009  ,
"MHH" ,  -2694  ,
"MHM" ,  -457  ,
"MHO" ,  123  ,
"MMH" ,  -471  ,
"NNH" ,  -1689  ,
"NNO" ,  662  ,
"OHO" ,  -3393 
]);

var TC4 = new Map([
"HHH" ,  -203  ,
"HHI" ,  1344  ,
"HHK" ,  365  ,
"HHM" ,  -122  ,
"HHN" ,  182  ,
"HHO" ,  669  ,
"HIH" ,  804  ,
"HII" ,  679  ,
"HOH" ,  446  ,
"IHH" ,  695  ,
"IHO" ,  -2324  ,
"IIH" ,  321  ,
"III" ,  1497  ,
"IIO" ,  656  ,
"IOO" ,  54  ,
"KAK" ,  4845  ,
"KKA" ,  3386  ,
"KKK" ,  3065  ,
"MHH" ,  -405  ,
"MHI" ,  201  ,
"MMH" ,  -241  ,
"MMM" ,  661  ,
"MOM" ,  841 
]);

var TQ1 = new Map([
"BHHH" ,  -227  ,
"BHHI" ,  316  ,
"BHIH" ,  -132  ,
"BIHH" ,  60  ,
"BIII" ,  1595  ,
"BNHH" ,  -744  ,
"BOHH" ,  225  ,
"BOOO" ,  -908  ,
"OAKK" ,  482  ,
"OHHH" ,  281  ,
"OHIH" ,  249  ,
"OIHI" ,  200  ,
"OIIH" ,  -68 
]);

var TQ2 = new Map([
"BIHH" ,  -1401  ,
"BIII" ,  -1033  ,
"BKAK" ,  -543  ,
"BOOO" ,  -5591 
]);

var TQ3 = new Map([
"BHHH" ,  478  ,
"BHHM" ,  -1073  ,
"BHIH" ,  222  ,
"BHII" ,  -504  ,
"BIIH" ,  -116  ,
"BIII" ,  -105  ,
"BMHI" ,  -863  ,
"BMHM" ,  -464  ,
"BOMH" ,  620  ,
"OHHH" ,  346  ,
"OHHI" ,  1729  ,
"OHII" ,  997  ,
"OHMH" ,  481  ,
"OIHH" ,  623  ,
"OIIH" ,  1344  ,
"OKAK" ,  2792  ,
"OKHH" ,  587  ,
"OKKA" ,  679  ,
"OOHH" ,  110  ,
"OOII" ,  -685 
]);

var TQ4 = new Map([
"BHHH" ,  -721  ,
"BHHM" ,  -3604  ,
"BHII" ,  -966  ,
"BIIH" ,  -607  ,
"BIII" ,  -2181  ,
"OAAA" ,  -2763  ,
"OAKK" ,  180  ,
"OHHH" ,  -294  ,
"OHHI" ,  2446  ,
"OHHO" ,  480  ,
"OHIH" ,  -1573  ,
"OIHH" ,  1935  ,
"OIHI" ,  -493  ,
"OIIH" ,  626  ,
"OIII" ,  -4007  ,
"OKAK" ,  -8156 
]);

var TW1 = new Map([
"につい" ,  -4681  ,
"東京都" ,  2026 
]);

var TW2 = new Map([
"ある程" ,  -2049  ,
"いった" ,  -1256  ,
"ころが" ,  -2434  ,
"しょう" ,  3873  ,
"その後" ,  -4430  ,
"だって" ,  -1049  ,
"ていた" ,  1833  ,
"として" ,  -4657  ,
"ともに" ,  -4517  ,
"もので" ,  1882  ,
"一気に" ,  -792  ,
"初めて" ,  -1512  ,
"同時に" ,  -8097  ,
"大きな" ,  -1255  ,
"対して" ,  -2721  ,
"社会党" ,  -3216 
]);

var TW3 = new Map([
"いただ" ,  -1734  ,
"してい" ,  1314  ,
"として" ,  -4314  ,
"につい" ,  -5483  ,
"にとっ" ,  -5989  ,
"に当た" ,  -6247  ,
"ので,",  -727  ,
"ので、" ,  -727  ,
"のもの" ,  -600  ,
"れから" ,  -3752  ,
"十二月" ,  -2287 
]);

var TW4 = new Map([
"いう." ,  8576  ,
"いう。" ,  8576  ,
"からな" ,  -2348  ,
"してい" ,  2958  ,
"たが,",  1516  ,
"たが、" ,  1516  ,
"ている" ,  1538  ,
"という" ,  1349  ,
"ました" ,  5543  ,
"ません" ,  1097  ,
"ようと" ,  -4258  ,
"よると" ,  5865 
]);

var UC1 = new Map([
"A" ,  484  ,
"K" ,  93  ,
"M" ,  645  ,
"O" ,  -505 
]);

var UC2 = new Map([
"A" ,  819  ,
"H" ,  1059  ,
"I" ,  409  ,
"M" ,  3987  ,
"N" ,  5775  ,
"O" ,  646 
]);

var UC3 = new Map([
"A" ,  -1370  ,
"I" ,  2311 
]);

var UC4 = new Map([
"A" ,  -2643  ,
"H" ,  1809  ,
"I" ,  -1032  ,
"K" ,  -3450  ,
"M" ,  3565  ,
"N" ,  3876  ,
"O" ,  6646 
]);

var UC5 = new Map([
"H" ,  313  ,
"I" ,  -1238  ,
"K" ,  -799  ,
"M" ,  539  ,
"O" ,  -831 
]);

var UC6 = new Map([
"H" ,  -506  ,
"I" ,  -253  ,
"K" ,  87  ,
"M" ,  247  ,
"O" ,  -387 
]);

var UP1 = new Map([
"O" ,  -214 
]);

var UP2 = new Map([
"B" ,  69  ,
"O" ,  935 
]);

var UP3 = new Map([
"B" ,  189 
]);

var UQ1 = new Map([
"BH" ,  21  ,
"BI" ,  -12  ,
"BK" ,  -99  ,
"BN" ,  142  ,
"BO" ,  -56  ,
"OH" ,  -95  ,
"OI" ,  477  ,
"OK" ,  410  ,
"OO" ,  -2422 
]);

var UQ2 = new Map([
"BH" ,  216  ,
"BI" ,  113  ,
"OK" ,  1759 
]);

var UQ3 = new Map([
"BA" ,  -479  ,
"BH" ,  42  ,
"BI" ,  1913  ,
"BK" ,  -7198  ,
"BM" ,  3160  ,
"BN" ,  6427  ,
"BO" ,  14761  ,
"OI" ,  -827  ,
"ON" ,  -3212 
]);

var UW1 = new Map([
"," ,  156  ,
"、" ,  156  ,
"「" ,  -463  ,
"あ" ,  -941  ,
"う" ,  -127  ,
"が" ,  -553  ,
"き" ,  121  ,
"こ" ,  505  ,
"で" ,  -201  ,
"と" ,  -547  ,
"ど" ,  -123  ,
"に" ,  -789  ,
"の" ,  -185  ,
"は" ,  -847  ,
"も" ,  -466  ,
"や" ,  -470  ,
"よ" ,  182  ,
"ら" ,  -292  ,
"り" ,  208  ,
"れ" ,  169  ,
"を" ,  -446  ,
"ん" ,  -137  ,
"・" ,  -135  ,
"主" ,  -402  ,
"京" ,  -268  ,
"区" ,  -912  ,
"午" ,  871  ,
"国" ,  -460  ,
"大" ,  561  ,
"委" ,  729  ,
"市" ,  -411  ,
"日" ,  -141  ,
"理" ,  361  ,
"生" ,  -408  ,
"県" ,  -386  ,
"都" ,  -718  ,
"「" ,  -463  ,
"・" ,  -135 
]);

var UW2 = new Map([
"," ,  -829  ,
"、" ,  -829  ,
"〇" ,  892  ,
"「" ,  -645  ,
"」" ,  3145  ,
"あ" ,  -538  ,
"い" ,  505  ,
"う" ,  134  ,
"お" ,  -502  ,
"か" ,  1454  ,
"が" ,  -856  ,
"く" ,  -412  ,
"こ" ,  1141  ,
"さ" ,  878  ,
"ざ" ,  540  ,
"し" ,  1529  ,
"す" ,  -675  ,
"せ" ,  300  ,
"そ" ,  -1011  ,
"た" ,  188  ,
"だ" ,  1837  ,
"つ" ,  -949  ,
"て" ,  -291  ,
"で" ,  -268  ,
"と" ,  -981  ,
"ど" ,  1273  ,
"な" ,  1063  ,
"に" ,  -1764  ,
"の" ,  130  ,
"は" ,  -409  ,
"ひ" ,  -1273  ,
"べ" ,  1261  ,
"ま" ,  600  ,
"も" ,  -1263  ,
"や" ,  -402  ,
"よ" ,  1639  ,
"り" ,  -579  ,
"る" ,  -694  ,
"れ" ,  571  ,
"を" ,  -2516  ,
"ん" ,  2095  ,
"ア" ,  -587  ,
"カ" ,  306  ,
"キ" ,  568  ,
"ッ" ,  831  ,
"三" ,  -758  ,
"不" ,  -2150  ,
"世" ,  -302  ,
"中" ,  -968  ,
"主" ,  -861  ,
"事" ,  492  ,
"人" ,  -123  ,
"会" ,  978  ,
"保" ,  362  ,
"入" ,  548  ,
"初" ,  -3025  ,
"副" ,  -1566  ,
"北" ,  -3414  ,
"区" ,  -422  ,
"大" ,  -1769  ,
"天" ,  -865  ,
"太" ,  -483  ,
"子" ,  -1519  ,
"学" ,  760  ,
"実" ,  1023  ,
"小" ,  -2009  ,
"市" ,  -813  ,
"年" ,  -1060  ,
"強" ,  1067  ,
"手" ,  -1519  ,
"揺" ,  -1033  ,
"政" ,  1522  ,
"文" ,  -1355  ,
"新" ,  -1682  ,
"日" ,  -1815  ,
"明" ,  -1462  ,
"最" ,  -630  ,
"朝" ,  -1843  ,
"本" ,  -1650  ,
"東" ,  -931  ,
"果" ,  -665  ,
"次" ,  -2378  ,
"民" ,  -180  ,
"気" ,  -1740  ,
"理" ,  752  ,
"発" ,  529  ,
"目" ,  -1584  ,
"相" ,  -242  ,
"県" ,  -1165  ,
"立" ,  -763  ,
"第" ,  810  ,
"米" ,  509  ,
"自" ,  -1353  ,
"行" ,  838  ,
"西" ,  -744  ,
"見" ,  -3874  ,
"調" ,  1010  ,
"議" ,  1198  ,
"込" ,  3041  ,
"開" ,  1758  ,
"間" ,  -1257  ,
"「" ,  -645  ,
"」" ,  3145  ,
"ッ" ,  831  ,
"ア" ,  -587  ,
"カ" ,  306  ,
"キ" ,  568 
]);

var UW3 = new Map([
"," ,  4889  ,
"1" ,  -800  ,
"−" ,  -1723  ,
"、" ,  4889  ,
"々" ,  -2311  ,
"〇" ,  5827  ,
"」" ,  2670  ,
"〓" ,  -3573  ,
"あ" ,  -2696  ,
"い" ,  1006  ,
"う" ,  2342  ,
"え" ,  1983  ,
"お" ,  -4864  ,
"か" ,  -1163  ,
"が" ,  3271  ,
"く" ,  1004  ,
"け" ,  388  ,
"げ" ,  401  ,
"こ" ,  -3552  ,
"ご" ,  -3116  ,
"さ" ,  -1058  ,
"し" ,  -395  ,
"す" ,  584  ,
"せ" ,  3685  ,
"そ" ,  -5228  ,
"た" ,  842  ,
"ち" ,  -521  ,
"っ" ,  -1444  ,
"つ" ,  -1081  ,
"て" ,  6167  ,
"で" ,  2318  ,
"と" ,  1691  ,
"ど" ,  -899  ,
"な" ,  -2788  ,
"に" ,  2745  ,
"の" ,  4056  ,
"は" ,  4555  ,
"ひ" ,  -2171  ,
"ふ" ,  -1798  ,
"へ" ,  1199  ,
"ほ" ,  -5516  ,
"ま" ,  -4384  ,
"み" ,  -120  ,
"め" ,  1205  ,
"も" ,  2323  ,
"や" ,  -788  ,
"よ" ,  -202  ,
"ら" ,  727  ,
"り" ,  649  ,
"る" ,  5905  ,
"れ" ,  2773  ,
"わ" ,  -1207  ,
"を" ,  6620  ,
"ん" ,  -518  ,
"ア" ,  551  ,
"グ" ,  1319  ,
"ス" ,  874  ,
"ッ" ,  -1350  ,
"ト" ,  521  ,
"ム" ,  1109  ,
"ル" ,  1591  ,
"ロ" ,  2201  ,
"ン" ,  278  ,
"・" ,  -3794  ,
"一" ,  -1619  ,
"下" ,  -1759  ,
"世" ,  -2087  ,
"両" ,  3815  ,
"中" ,  653  ,
"主" ,  -758  ,
"予" ,  -1193  ,
"二" ,  974  ,
"人" ,  2742  ,
"今" ,  792  ,
"他" ,  1889  ,
"以" ,  -1368  ,
"低" ,  811  ,
"何" ,  4265  ,
"作" ,  -361  ,
"保" ,  -2439  ,
"元" ,  4858  ,
"党" ,  3593  ,
"全" ,  1574  ,
"公" ,  -3030  ,
"六" ,  755  ,
"共" ,  -1880  ,
"円" ,  5807  ,
"再" ,  3095  ,
"分" ,  457  ,
"初" ,  2475  ,
"別" ,  1129  ,
"前" ,  2286  ,
"副" ,  4437  ,
"力" ,  365  ,
"動" ,  -949  ,
"務" ,  -1872  ,
"化" ,  1327  ,
"北" ,  -1038  ,
"区" ,  4646  ,
"千" ,  -2309  ,
"午" ,  -783  ,
"協" ,  -1006  ,
"口" ,  483  ,
"右" ,  1233  ,
"各" ,  3588  ,
"合" ,  -241  ,
"同" ,  3906  ,
"和" ,  -837  ,
"員" ,  4513  ,
"国" ,  642  ,
"型" ,  1389  ,
"場" ,  1219  ,
"外" ,  -241  ,
"妻" ,  2016  ,
"学" ,  -1356  ,
"安" ,  -423  ,
"実" ,  -1008  ,
"家" ,  1078  ,
"小" ,  -513  ,
"少" ,  -3102  ,
"州" ,  1155  ,
"市" ,  3197  ,
"平" ,  -1804  ,
"年" ,  2416  ,
"広" ,  -1030  ,
"府" ,  1605  ,
"度" ,  1452  ,
"建" ,  -2352  ,
"当" ,  -3885  ,
"得" ,  1905  ,
"思" ,  -1291  ,
"性" ,  1822  ,
"戸" ,  -488  ,
"指" ,  -3973  ,
"政" ,  -2013  ,
"教" ,  -1479  ,
"数" ,  3222  ,
"文" ,  -1489  ,
"新" ,  1764  ,
"日" ,  2099  ,
"旧" ,  5792  ,
"昨" ,  -661  ,
"時" ,  -1248  ,
"曜" ,  -951  ,
"最" ,  -937  ,
"月" ,  4125  ,
"期" ,  360  ,
"李" ,  3094  ,
"村" ,  364  ,
"東" ,  -805  ,
"核" ,  5156  ,
"森" ,  2438  ,
"業" ,  484  ,
"氏" ,  2613  ,
"民" ,  -1694  ,
"決" ,  -1073  ,
"法" ,  1868  ,
"海" ,  -495  ,
"無" ,  979  ,
"物" ,  461  ,
"特" ,  -3850  ,
"生" ,  -273  ,
"用" ,  914  ,
"町" ,  1215  ,
"的" ,  7313  ,
"直" ,  -1835  ,
"省" ,  792  ,
"県" ,  6293  ,
"知" ,  -1528  ,
"私" ,  4231  ,
"税" ,  401  ,
"立" ,  -960  ,
"第" ,  1201  ,
"米" ,  7767  ,
"系" ,  3066  ,
"約" ,  3663  ,
"級" ,  1384  ,
"統" ,  -4229  ,
"総" ,  1163  ,
"線" ,  1255  ,
"者" ,  6457  ,
"能" ,  725  ,
"自" ,  -2869  ,
"英" ,  785  ,
"見" ,  1044  ,
"調" ,  -562  ,
"財" ,  -733  ,
"費" ,  1777  ,
"車" ,  1835  ,
"軍" ,  1375  ,
"込" ,  -1504  ,
"通" ,  -1136  ,
"選" ,  -681  ,
"郎" ,  1026  ,
"郡" ,  4404  ,
"部" ,  1200  ,
"金" ,  2163  ,
"長" ,  421  ,
"開" ,  -1432  ,
"間" ,  1302  ,
"関" ,  -1282  ,
"雨" ,  2009  ,
"電" ,  -1045  ,
"非" ,  2066  ,
"駅" ,  1620  ,
"1" ,  -800  ,
"」" ,  2670  ,
"・" ,  -3794  ,
"ッ" ,  -1350  ,
"ア" ,  551  ,
"グ" ,  1319  ,
"ス" ,  874  ,
"ト" ,  521  ,
"ム" ,  1109  ,
"ル" ,  1591  ,
"ロ" ,  2201  ,
"ン" ,  278 
]);

var UW4 = new Map([
"," ,  3930  ,
"." ,  3508  ,
"―" ,  -4841  ,
"、" ,  3930  ,
"。" ,  3508  ,
"〇" ,  4999  ,
"「" ,  1895  ,
"」" ,  3798  ,
"〓" ,  -5156  ,
"あ" ,  4752  ,
"い" ,  -3435  ,
"う" ,  -640  ,
"え" ,  -2514  ,
"お" ,  2405  ,
"か" ,  530  ,
"が" ,  6006  ,
"き" ,  -4482  ,
"ぎ" ,  -3821  ,
"く" ,  -3788  ,
"け" ,  -4376  ,
"げ" ,  -4734  ,
"こ" ,  2255  ,
"ご" ,  1979  ,
"さ" ,  2864  ,
"し" ,  -843  ,
"じ" ,  -2506  ,
"す" ,  -731  ,
"ず" ,  1251  ,
"せ" ,  181  ,
"そ" ,  4091  ,
"た" ,  5034  ,
"だ" ,  5408  ,
"ち" ,  -3654  ,
"っ" ,  -5882  ,
"つ" ,  -1659  ,
"て" ,  3994  ,
"で" ,  7410  ,
"と" ,  4547  ,
"な" ,  5433  ,
"に" ,  6499  ,
"ぬ" ,  1853  ,
"ね" ,  1413  ,
"の" ,  7396  ,
"は" ,  8578  ,
"ば" ,  1940  ,
"ひ" ,  4249  ,
"び" ,  -4134  ,
"ふ" ,  1345  ,
"へ" ,  6665  ,
"べ" ,  -744  ,
"ほ" ,  1464  ,
"ま" ,  1051  ,
"み" ,  -2082  ,
"む" ,  -882  ,
"め" ,  -5046  ,
"も" ,  4169  ,
"ゃ" ,  -2666  ,
"や" ,  2795  ,
"ょ" ,  -1544  ,
"よ" ,  3351  ,
"ら" ,  -2922  ,
"り" ,  -9726  ,
"る" ,  -14896  ,
"れ" ,  -2613  ,
"ろ" ,  -4570  ,
"わ" ,  -1783  ,
"を" ,  13150  ,
"ん" ,  -2352  ,
"カ" ,  2145  ,
"コ" ,  1789  ,
"セ" ,  1287  ,
"ッ" ,  -724  ,
"ト" ,  -403  ,
"メ" ,  -1635  ,
"ラ" ,  -881  ,
"リ" ,  -541  ,
"ル" ,  -856  ,
"ン" ,  -3637  ,
"・" ,  -4371  ,
"ー" ,  -11870  ,
"一" ,  -2069  ,
"中" ,  2210  ,
"予" ,  782  ,
"事" ,  -190  ,
"井" ,  -1768  ,
"人" ,  1036  ,
"以" ,  544  ,
"会" ,  950  ,
"体" ,  -1286  ,
"作" ,  530  ,
"側" ,  4292  ,
"先" ,  601  ,
"党" ,  -2006  ,
"共" ,  -1212  ,
"内" ,  584  ,
"円" ,  788  ,
"初" ,  1347  ,
"前" ,  1623  ,
"副" ,  3879  ,
"力" ,  -302  ,
"動" ,  -740  ,
"務" ,  -2715  ,
"化" ,  776  ,
"区" ,  4517  ,
"協" ,  1013  ,
"参" ,  1555  ,
"合" ,  -1834  ,
"和" ,  -681  ,
"員" ,  -910  ,
"器" ,  -851  ,
"回" ,  1500  ,
"国" ,  -619  ,
"園" ,  -1200  ,
"地" ,  866  ,
"場" ,  -1410  ,
"塁" ,  -2094  ,
"士" ,  -1413  ,
"多" ,  1067  ,
"大" ,  571  ,
"子" ,  -4802  ,
"学" ,  -1397  ,
"定" ,  -1057  ,
"寺" ,  -809  ,
"小" ,  1910  ,
"屋" ,  -1328  ,
"山" ,  -1500  ,
"島" ,  -2056  ,
"川" ,  -2667  ,
"市" ,  2771  ,
"年" ,  374  ,
"庁" ,  -4556  ,
"後" ,  456  ,
"性" ,  553  ,
"感" ,  916  ,
"所" ,  -1566  ,
"支" ,  856  ,
"改" ,  787  ,
"政" ,  2182  ,
"教" ,  704  ,
"文" ,  522  ,
"方" ,  -856  ,
"日" ,  1798  ,
"時" ,  1829  ,
"最" ,  845  ,
"月" ,  -9066  ,
"木" ,  -485  ,
"来" ,  -442  ,
"校" ,  -360  ,
"業" ,  -1043  ,
"氏" ,  5388  ,
"民" ,  -2716  ,
"気" ,  -910  ,
"沢" ,  -939  ,
"済" ,  -543  ,
"物" ,  -735  ,
"率" ,  672  ,
"球" ,  -1267  ,
"生" ,  -1286  ,
"産" ,  -1101  ,
"田" ,  -2900  ,
"町" ,  1826  ,
"的" ,  2586  ,
"目" ,  922  ,
"省" ,  -3485  ,
"県" ,  2997  ,
"空" ,  -867  ,
"立" ,  -2112  ,
"第" ,  788  ,
"米" ,  2937  ,
"系" ,  786  ,
"約" ,  2171  ,
"経" ,  1146  ,
"統" ,  -1169  ,
"総" ,  940  ,
"線" ,  -994  ,
"署" ,  749  ,
"者" ,  2145  ,
"能" ,  -730  ,
"般" ,  -852  ,
"行" ,  -792  ,
"規" ,  792  ,
"警" ,  -1184  ,
"議" ,  -244  ,
"谷" ,  -1000  ,
"賞" ,  730  ,
"車" ,  -1481  ,
"軍" ,  1158  ,
"輪" ,  -1433  ,
"込" ,  -3370  ,
"近" ,  929  ,
"道" ,  -1291  ,
"選" ,  2596  ,
"郎" ,  -4866  ,
"都" ,  1192  ,
"野" ,  -1100  ,
"銀" ,  -2213  ,
"長" ,  357  ,
"間" ,  -2344  ,
"院" ,  -2297  ,
"際" ,  -2604  ,
"電" ,  -878  ,
"領" ,  -1659  ,
"題" ,  -792  ,
"館" ,  -1984  ,
"首" ,  1749  ,
"高" ,  2120  ,
"「" ,  1895  ,
"」" ,  3798  ,
"・" ,  -4371  ,
"ッ" ,  -724  ,
"ー" ,  -11870  ,
"カ" ,  2145  ,
"コ" ,  1789  ,
"セ" ,  1287  ,
"ト" ,  -403  ,
"メ" ,  -1635  ,
"ラ" ,  -881  ,
"リ" ,  -541  ,
"ル" ,  -856  ,
"ン" ,  -3637 
]);

var UW5 = new Map([
"," ,  465  ,
"." ,  -299  ,
"1" ,  -514  ,
"E2" ,  -32768  ,
" " ,  -2762 ],
"、" ,  465  ,
"。" ,  -299  ,
"「" ,  363  ,
"あ" ,  1655  ,
"い" ,  331  ,
"う" ,  -503  ,
"え" ,  1199  ,
"お" ,  527  ,
"か" ,  647  ,
"が" ,  -421  ,
"き" ,  1624  ,
"ぎ" ,  1971  ,
"く" ,  312  ,
"げ" ,  -983  ,
"さ" ,  -1537  ,
"し" ,  -1371  ,
"す" ,  -852  ,
"だ" ,  -1186  ,
"ち" ,  1093  ,
"っ" ,  52  ,
"つ" ,  921  ,
"て" ,  -18  ,
"で" ,  -850  ,
"と" ,  -127  ,
"ど" ,  1682  ,
"な" ,  -787  ,
"に" ,  -1224  ,
"の" ,  -635  ,
"は" ,  -578  ,
"べ" ,  1001  ,
"み" ,  502  ,
"め" ,  865  ,
"ゃ" ,  3350  ,
"ょ" ,  854  ,
"り" ,  -208  ,
"る" ,  429  ,
"れ" ,  504  ,
"わ" ,  419  ,
"を" ,  -1264  ,
"ん" ,  327  ,
"イ" ,  241  ,
"ル" ,  451  ,
"ン" ,  -343  ,
"中" ,  -871  ,
"京" ,  722  ,
"会" ,  -1153  ,
"党" ,  -654  ,
"務" ,  3519  ,
"区" ,  -901  ,
"告" ,  848  ,
"員" ,  2104  ,
"大" ,  -1296  ,
"学" ,  -548  ,
"定" ,  1785  ,
"嵐" ,  -1304  ,
"市" ,  -2991  ,
"席" ,  921  ,
"年" ,  1763  ,
"思" ,  872  ,
"所" ,  -814  ,
"挙" ,  1618  ,
"新" ,  -1682  ,
"日" ,  218  ,
"月" ,  -4353  ,
"査" ,  932  ,
"格" ,  1356  ,
"機" ,  -1508  ,
"氏" ,  -1347  ,
"田" ,  240  ,
"町" ,  -3912  ,
"的" ,  -3149  ,
"相" ,  1319  ,
"省" ,  -1052  ,
"県" ,  -4003  ,
"研" ,  -997  ,
"社" ,  -278  ,
"空" ,  -813  ,
"統" ,  1955  ,
"者" ,  -2233  ,
"表" ,  663  ,
"語" ,  -1073  ,
"議" ,  1219  ,
"選" ,  -1018  ,
"郎" ,  -368  ,
"長" ,  786  ,
"間" ,  1191  ,
"題" ,  2368  ,
"館" ,  -689  ,
"1" ,  -514  ,
"E2" ,  -32768  ,
"「" ,  363  ,
"イ" ,  241  ,
"ル" ,  451  ,
"ン" ,  -343 
]);

var UW6 = new Map([
"," ,  227  ,
"." ,  808  ,
"1" ,  -270  ,
"E1" ,  306  ,
"、" ,  227  ,
"。" ,  808  ,
"あ" ,  -307  ,
"う" ,  189  ,
"か" ,  241  ,
"が" ,  -73  ,
"く" ,  -121  ,
"こ" ,  -200  ,
"じ" ,  1782  ,
"す" ,  383  ,
"た" ,  -428  ,
"っ" ,  573  ,
"て" ,  -1014  ,
"で" ,  101  ,
"と" ,  -105  ,
"な" ,  -253  ,
"に" ,  -149  ,
"の" ,  -417  ,
"は" ,  -236  ,
"も" ,  -206  ,
"り" ,  187  ,
"る" ,  -135  ,
"を" ,  195  ,
"ル" ,  -673  ,
"ン" ,  -496  ,
"一" ,  -277  ,
"中" ,  201  ,
"件" ,  -800  ,
"会" ,  624  ,
"前" ,  302  ,
"区" ,  1792  ,
"員" ,  -1212  ,
"委" ,  798  ,
"学" ,  -960  ,
"市" ,  887  ,
"広" ,  -695  ,
"後" ,  535  ,
"業" ,  -697  ,
"相" ,  753  ,
"社" ,  -507  ,
"福" ,  974  ,
"空" ,  -822  ,
"者" ,  1811  ,
"連" ,  463  ,
"郎" ,  1082  ,
"1" ,  -270  ,
"E1" ,  306  ,
"ル" ,  -673  ,
"ン" ,  -496 
]);

var CharMap = (() =>{
var m = new Map();

for ( var s = '一'.charCodeAt(), e = '龠'.charCodeAt(); s <= e; s++ ) {
m.set( String.fromCharCode(s), 'H' );
}

for ( var s = 'ぁ'.charCodeAt(), e = 'ん'.charCodeAt(); s <= e; s++ ) {
m.set( String.fromCharCode(s), 'I' );
}

for ( var s = 'ァ'.charCodeAt(), e = 'ヴ'.charCodeAt(); s <= e; s++ ) {
m.set( String.fromCharCode(s), 'K' );
}

for ( var s = 'ァ'.charCodeAt(), e = 'ン'.charCodeAt(); s <= e; s++ ) {
m.set( String.fromCharCode(s), 'K' );
}

for ( var s = 'a'.charCodeAt(), e = 'z'.charCodeAt(); s <= e; s++ ) {
m.set( String.fromCharCode(s), 'A' );
}

for ( var s = 'A'.charCodeAt(), e = 'Z'.charCodeAt(); s <= e; s++ ) {
m.set( String.fromCharCode(s), 'A' );
}

for ( var s = 'a'.charCodeAt(), e = 'z'.charCodeAt(); s <= e; s++ ) {
m.set( String.fromCharCode(s), 'A' );
}

for ( var s = 'A'.charCodeAt(), e = 'Z'.charCodeAt(); s <= e; s++ ) {
m.set( String.fromCharCode(s), 'A' );
}

for ( var s = '0'.charCodeAt(), e = '9'.charCodeAt(); s <= e; s++ ) {
m.set( String.fromCharCode(s), 'N' );
}

for ( var s = '0'.charCodeAt(), e = '9'.charCodeAt(); s <= e; s++ ) {
m.set( String.fromCharCode(s), 'N' );
}

'一二三四五六七八九十百千万億兆'.split('').forEach((c) => {
m.set( c, 'M' );
});

'々〆ヵヶ'.split('').forEach((c) => {
m.set( c, 'H' );
});

return m;
})();

function tokenize(text) {
if ( typeof(text) === 'undefined' || text === null || ! text instanceof String || text === "" ) {
return [];
}

var result = [];
var idx    = 0;
var start  = idx;
var last   = text.length - 1;
 
var p1     = 'U'
, p2     = 'U'
, p3     = 'U'
;

var w1     = 'B3'
, w2     = 'B2'
, w3     = 'B1'
;

var c1     = 'O'
, c2     = 'O'
, c3     = 'O'
;

var w4     = text idx ;
var c4     = CharMap.get(w4) || 'O'; 

var idx1   = idx + 1;
var idx2   = idx + 2;
var idx3   = null;

var w5, w6, c5, c6 ;

if ( idx === last ) {
w5 = 'E1';
w6 = 'E2';
c5 = 'O';
c6 = 'O';
} else {
w5 = text idx1 ;
c5 = CharMap.get(w5) || 'O';

if ( idx1 === last ) {
w6 = 'E1';
c5 = 'O';
} else {
w6 = text idx2 ;
c6 = CharMap.get(w6) || 'O';
}
}

while ( idx < last ) {
var score = BIAS;

w1 = w2;
w2 = w3;
w3 = w4;
w4 = w5;
w5 = w6;

c1 = c2;
c2 = c3;
c3 = c4;
c4 = c5;
c5 = c6;

idx3 = idx + 3;

if ( idx3 <= last ) {
w6 = text idx3 ;
c6 = CharMap.get(w6) || 'O';
} else if ( idx2 === last ) {
w6 = 'E1';
c6 = 'O';
} else {
w6 = 'E2';
c6 = 'O';
}

if ( p1 === 'O' ) {
score += -214;
}

if ( p2 === 'B' ) {
score += 69;
} else if ( p2 === 'O' ) {
score += 935;
}

if ( p3 === 'B' ) {
score += 189;
}

score += (BP1.get(p1 + p2) || 0);
score += (BP2.get(p2 + p3) || 0);
score += (UW1.get(w1) || 0);
score += (UW2.get(w2) || 0);
score += (UW3.get(w3) || 0);
score += (UW4.get(w4) || 0);
score += (UW5.get(w5) || 0);
score += (UW6.get(w6) || 0);
score += (BW1.get(w2 + w3) || 0);
score += (BW2.get(w3 + w4) || 0);
score += (BW3.get(w4 + w5) || 0);
score += (TW1.get(w1 + w2 + w3) || 0);
score += (TW2.get(w2 + w3 + w4) || 0);
score += (TW3.get(w3 + w4 + w5) || 0);
score += (TW4.get(w4 + w5 + w6) || 0);
score += (UC1.get(c1) || 0);
score += (UC2.get(c2) || 0);

if ( c3 === 'A' ) {
score += -1370;
} else if ( c3 === 'I' ) {
score += 2311;
}

score += (UC4.get(c4) || 0);
score += (UC5.get(c5) || 0);
score += (UC6.get(c6) || 0);
score += (BC1.get(c2 + c3) || 0);
score += (BC2.get(c3 + c4) || 0);
score += (BC3.get(c4 + c5) || 0);
score += (TC1.get(c1 + c2 + c3) || 0);
score += (TC2.get(c2 + c3 + c4) || 0);
score += (TC3.get(c3 + c4 + c5) || 0);
score += (TC4.get(c4 + c5 + c6) || 0);
score += (UQ1.get(p1 + c1) || 0);
score += (UQ2.get(p2 + c2) || 0);
score += (UQ3.get(p3 + c3) || 0);
score += (BQ1.get(p2 + c2 + c3) || 0);
score += (BQ2.get(p2 + c3 + c4) || 0);
score += (BQ3.get(p3 + c2 + c3) || 0);
score += (BQ4.get(p3 + c3 + c4) || 0);
score += (TQ1.get(p2 + c1 + c2 + c3) || 0);
score += (TQ2.get(p2 + c2 + c3 + c4) || 0);
score += (TQ3.get(p3 + c1 + c2 + c3) || 0);
score += (TQ4.get(p3 + c2 + c3 + c4) || 0);

var p = 'O';

if ( score > 0 ) {
result  result.length   = text.substring(start, idx1);
start = idx1;
p     = 'B';
}

p1 = p2;
p2 = p3;
p3 = p;

idx   = idx1;
idx1  = idx2;
idx2  = idx3;
}

result  result.length   = text.substring(start, idx1);

return result;
}

module.exports = tokenize;

どれぐらい速くなったか

TinySegmenter.jl のリポジトリに有るベンチマークを適当に走らせてみた結果、 手元の、

という環境では、大体、ベンチマーク内にあるテキストの一回分の分かち書きの処理速度が、

オリジナル: 0.84sec ぐらい
逆移植版:   0.35sec ぐらい

となったので、大体 2.4 倍ぐらい速くなったっぽいです。

感想

うん、まあ今回は Julia 版をほぼ丸写しに近い形で移植したんですが、 僕がよく訳の分からんポカミスをやらかしていた影響で、昨日とかでもそうだったんですが、

あれ、なんか動いてない! なんでや……あ! 処理が抜けてる!

ってなって時間を消費していたので、まーその辺り、自分のポカミス具合いを改善出来たらなーとか思いました。

あと、今回書き写していた過程で、

Node.js v7.5.0 だと const とか let 使うと遅くなって var 使うと速い! なんでや!!1

という謎の知見を得られたりしたんですが、まあその辺りも面白かったですね。はい。


という事で話として以上です。 良かったら今回の TinySegmenter 、使ってみてください。 3-caluse BSD なので無保証だけど!

nyarla が大体

Scrapbox でコメントや意見を書く