diff --git a/out.sh b/out.sh index 8c723a7..e2ac63b 100755 --- a/out.sh +++ b/out.sh @@ -2,7 +2,7 @@ set -o errexit -o pipefail export LC_ALL=C rootfiles=(s js big_favicon.png favicon.ico css.css tiles.png help.png index.html index_multifile.xhtml) -jsfiles=(cc.js dict_editor.js structures.js obj_editor.js cselect.js guimgr.js main_gui.js console_helpers.js) +jsfiles=(cc.js utf8to16.js dict_editor.js structures.js obj_editor.js cselect.js guimgr.js main_gui.js console_helpers.js) d=$(dirname -- "$0") cd -- "$d" diff --git a/src/css.css b/src/css.css index ef4e1ba..f489790 100644 --- a/src/css.css +++ b/src/css.css @@ -620,12 +620,14 @@ hr{ .growc>*{ flex-grow:1; } -a.thiccb{ - padding:10px; +a.btn{ color:var(--Link); text-decoration:underline; white-space:pre-wrap; } +a.thiccb{ + padding:10px; +} input.thiccb{ padding:10px; text-align:left; diff --git a/src/index_multifile.xhtml b/src/index_multifile.xhtml index 688c778..d9b035a 100644 --- a/src/index_multifile.xhtml +++ b/src/index_multifile.xhtml @@ -17,6 +17,7 @@ + diff --git a/src/js/cc.js b/src/js/cc.js index b1f7969..f221d47 100644 --- a/src/js/cc.js +++ b/src/js/cc.js @@ -262,13 +262,24 @@ function cc_load_gzip_file_reader_onload(){ ch=str.writable.getWriter(); ch.write(cc); ch.close(); - ch=str.readable.pipeThrough(new TextDecoderStream('utf-8',{'fatal':true})).getReader(); - str=''; + ch=str.readable.getReader(); + cc=[]; ch.read().then(function ondata(d){ - if(d.done)cc_load_xml_string(str); - else try{ - str+=d.value; - ch.read().then(ondata,pos); + try{ + if(d.done){ + if(cc.length===1)buf=cc[0]; + else{ + var i=cc.length,j=0,buf; + while(i)j+=cc[--i].length; + buf=new Uint8Array(j); + i=cc.length; + while(i)buf.set(cc[--i],j-=cc[i].length); + } + utf8to16(cc_load_xml_string,loading_modal_abort,buf,'',0); + }else{ + cc.push(d.value); + ch.read().then(ondata,pos); + } }catch(error){ pos(error); } @@ -279,13 +290,11 @@ function cc_load_gzip_file_reader_onload(){ } function cc_load_xml_file_reader_onload(){ - try{ - var s=decodeURIComponent(escape(this.result)); - }catch(error){ - say_error('bad UTF-8',error); - return; - } - cc_load_xml_string(s); + utf8to16(cc_load_xml_string,loading_modal_abort,new Uint8Array(this.result),'',0); +} + +function loading_modal_abort(){ + current_gui().error_tag.textContent='bad UTF-8 (aborted)'; } function cc_load_gzip(file){ @@ -361,10 +370,7 @@ function cc_load_aes_file_reader_onload(){ if(!decryptedbuf)throw Error('padding = 0'); var m=i---decryptedbuf; while(i>m)if(decryptedbuf!==decrypted[--i])throw Error('padding bytes not all equal'); - cc_load_xml_string(new TextDecoder('utf-8',{ - 'fatal':true, - 'ignoreBOM':false - }).decode(decrypted.subarray(0,m))); + utf8to16(cc_load_xml_string,loading_modal_abort,decrypted.subarray(0,m),'',0); }).catch(say_error.bind(null,'AES-ECB 256 decrypt')); }catch(error){ say_error('AES-ECB 256 decrypt',error); @@ -443,7 +449,7 @@ function cc_load_xml(file){ var fr=new FileReader(); fr.addEventListener('error',file_reader_onerror,onceel); fr.addEventListener('load',cc_load_xml_file_reader_onload,onceel); - fr.readAsBinaryString(file); + fr.readAsArrayBuffer(file); }catch(error){ say_error('FileReader',error); } diff --git a/src/js/dict_editor.js b/src/js/dict_editor.js index 1c2138b..4efa0d4 100644 --- a/src/js/dict_editor.js +++ b/src/js/dict_editor.js @@ -216,10 +216,10 @@ function xml_ie_back(){ function xml_ie_show_error(e){ console.error(e); - var li=cre('li'),g=current_gui(); + var li=cre('li'); li.style.backgroundColor='var(--Bad)'; li.textContent=e; - g.status.appendChild(li); + current_gui().status.appendChild(li); set_loading(false); } @@ -228,13 +228,10 @@ function xml_ie_filereader_onerror(){ } function xml_ie_filereader_onload(){ - try{ - var g=decodeURIComponent(escape(this.result)); - }catch(error){ - xml_ie_show_error('bad UTF-8'); - console.error('original error:',error); - return; - } + utf8to16(xml_ie_utf8to16_callback,xml_ie_utf8to16_abort,new Uint8Array(this.result),'',0); +} + +function xml_ie_utf8to16_callback(g){ try{ var d=document.implementation.createDocument(null,'d',null).documentElement; d.innerHTML=g; @@ -250,6 +247,13 @@ function xml_ie_filereader_onload(){ } } +function xml_ie_utf8to16_abort(){ + var li=cre('li'); + li.style.backgroundColor='var(--Bad)'; + li.textContent='bad UTF-8 (aborted)'; + current_gui().status.appendChild(li); +} + function xml_ie_import(){ var g=current_gui(),f=g.last_blob_url; if(f){ @@ -270,7 +274,7 @@ function xml_ie_import(){ g=new FileReader(); g.addEventListener('error',xml_ie_filereader_onerror,onceel); g.addEventListener('load',xml_ie_filereader_onload,onceel); - g.readAsBinaryString(f); + g.readAsArrayBuffer(f); }catch(error){ xml_ie_show_error(error); } diff --git a/src/js/guimgr.js b/src/js/guimgr.js index 81c88ed..dc93d93 100644 --- a/src/js/guimgr.js +++ b/src/js/guimgr.js @@ -449,15 +449,13 @@ function AdvFileLoader(raw,out){ AdvFileLoader.prototype.handleEvent=function(e){ switch(e.type){ case 'load': - if(this.raw)e=e.target.result; - else try{ - e=decodeURIComponent(escape(e.target.result)); - }catch(error){ - say_error('bad UTF-8',error); + if(this.raw){ + this.out.setvalchg(e.target.result); + set_loading(false); return; } - this.out.setvalchg(e); - set_loading(false); + var r=this.out; + utf8to16(AdvFileLoader.utf8to16_callback.bind(this.out),Function.prototype,new Uint8Array(e.target.result),'',0); return; case 'error': say_error('FileReader',e.target.error); @@ -466,13 +464,18 @@ AdvFileLoader.prototype.handleEvent=function(e){ if(e=e.target.files[0]){ set_loading(true); try{ - var r=new FileReader(); + r=new FileReader(); r.addEventListener('load',this,onceel); r.addEventListener('error',this,onceel); - r.readAsBinaryString(e); + r[this.raw?'readAsBinaryString':'readAsArrayBuffer'](e); }catch(error){ say_error('FileReader',error); } } } +}; + +AdvFileLoader.utf8to16_callback=function(str){ + this.setvalchg(str); + set_loading(false); }; \ No newline at end of file diff --git a/src/js/utf8to16.js b/src/js/utf8to16.js new file mode 100644 index 0000000..98200ee --- /dev/null +++ b/src/js/utf8to16.js @@ -0,0 +1,86 @@ +'use strict'; + +//https://encoding.spec.whatwg.org/#utf-8-decoder + +function utf8to16(callback_ok,callback_abort,buf,outstr,i){ + var l=buf.length,chr=String.fromCharCode,b1,b2,b3,b4,j; + oof:do{ + for(;i!==l;++i){ + if(buf[j=i]<128){ + if((b1=l-i)>8192)b1=8192; + while(++i,--b1&&buf[i]<128); + outstr+=chr.apply(null,buf.subarray(j,i--)); + continue; + } + if((b1=buf[i])<194||b1>244 + ||++i===l)continue oof; + b2=buf[i]; + //b1 is valid start of multi-byte code point + if(b1<224){ + //110xxxxx (2 byte code point) + if(b2<128||b2>191)continue oof; + outstr+=chr(b2^(b1<<6)^12416); + continue; + } + if(b1<240){ + //1110xxxx (3 byte code point) + if(b2<(b1===224?160:128)||b2>(b1===237?159:191) + ||++i===l + ||(b3=buf[i])<128||b3>191)continue oof; + outstr+=chr(b3^(b2<<6)^(b1<<12)^925824); + continue; + } + //11110xxx (4 byte code point) + if(b2<(b1===240?144:128)||b2>(b1===244?143:191) + ||++i===l + ||(b3=buf[i])<128||b3>191 + ||++i===l + ||(b4=buf[i])<128||b4>191)continue oof; + outstr+=chr( + (b3>>4)+(b2<<2)+(b1<<8)-6728, + b4^((b3&15)<<6)^56448 + ); + } + callback_ok(outstr); + return; + }while(!callback_abort&&(i+=i===j,outstr+='\uFFFD')); + + //ask how to handle error + //don't set tbl.innerHTML because browser adds in html mode (non-xhtml) + var tbl=cre('table'),tr=cre('tr'); + tbl.className='tableborder'; + tr.innerHTML='bad UTF-8'; + tbl.appendChild(tr); + (tr=cre('tr')).innerHTML='code point offset inside data:'+j+''; + tbl.appendChild(tr); + (tr=cre('tr')).innerHTML='error offset inside code point:'+(i-j)+''; + tbl.appendChild(tr); + (tr=cre('tr')).innerHTML='data length:'+l+''; + tbl.appendChild(tr); + (tr=cre('tr')).innerHTML='save data'; + tr.firstChild.addEventListener('click',function(e){ + switch((e=e.target).dataset.a){ + case 'a': + pop_gui(); + callback_abort(); + set_loading(false); + return; + case 'r': + set_loading(true); + pop_gui(); + utf8to16(callback_ok,null,buf,outstr+'\uFFFD',i+(i===j)); + return; + case 's': + setTimeout(revoke_href,0,e); + e.href=URL.createObjectURL(new Blob([buf],binblobopts)); + } + },capel); + tbl.appendChild(tr); + push_gui(tbl,true); + set_loading(false); +} + +function revoke_href(a){ + URL.revokeObjectURL(a.href); + a.href='javascript:;'; +} \ No newline at end of file