Skip to content

Commit

Permalink
custom UTF-8 (Uint8Array) to UTF-16 (String) converter that displays …
Browse files Browse the repository at this point in the history
…error info

fixes #4
  • Loading branch information
xBZZZZ committed Mar 23, 2024
1 parent b345a38 commit e83b87f
Show file tree
Hide file tree
Showing 7 changed files with 142 additions and 40 deletions.
2 changes: 1 addition & 1 deletion out.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
set -o errexit -o pipefail
export LC_ALL=C
rootfiles=(s js big_favicon.png favicon.ico css.css tiles.png help.png index.html index_multifile.xhtml)
jsfiles=(cc.js dict_editor.js structures.js obj_editor.js cselect.js guimgr.js main_gui.js console_helpers.js)
jsfiles=(cc.js utf8to16.js dict_editor.js structures.js obj_editor.js cselect.js guimgr.js main_gui.js console_helpers.js)

d=$(dirname -- "$0")
cd -- "$d"
Expand Down
6 changes: 4 additions & 2 deletions src/css.css
Original file line number Diff line number Diff line change
Expand Up @@ -620,12 +620,14 @@ hr{
.growc>*{
flex-grow:1;
}
a.thiccb{
padding:10px;
a.btn{
color:var(--Link);
text-decoration:underline;
white-space:pre-wrap;
}
a.thiccb{
padding:10px;
}
input.thiccb{
padding:10px;
text-align:left;
Expand Down
1 change: 1 addition & 0 deletions src/index_multifile.xhtml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
<link rel='stylesheet' href='css.css'/>
<script src='js/inlineables.js'></script>
<script src='js/cc.js'></script>
<script src='js/utf8to16.js'></script>
<script src='js/dict_editor.js'></script>
<script src='js/structures.js'></script>
<script src='js/obj_editor.js'></script>
Expand Down
42 changes: 24 additions & 18 deletions src/js/cc.js
Original file line number Diff line number Diff line change
Expand Up @@ -262,13 +262,24 @@ function cc_load_gzip_file_reader_onload(){
ch=str.writable.getWriter();
ch.write(cc);
ch.close();
ch=str.readable.pipeThrough(new TextDecoderStream('utf-8',{'fatal':true})).getReader();
str='';
ch=str.readable.getReader();
cc=[];
ch.read().then(function ondata(d){
if(d.done)cc_load_xml_string(str);
else try{
str+=d.value;
ch.read().then(ondata,pos);
try{
if(d.done){
if(cc.length===1)buf=cc[0];
else{
var i=cc.length,j=0,buf;
while(i)j+=cc[--i].length;
buf=new Uint8Array(j);
i=cc.length;
while(i)buf.set(cc[--i],j-=cc[i].length);
}
utf8to16(cc_load_xml_string,loading_modal_abort,buf,'',0);
}else{
cc.push(d.value);
ch.read().then(ondata,pos);
}
}catch(error){
pos(error);
}
Expand All @@ -279,13 +290,11 @@ function cc_load_gzip_file_reader_onload(){
}

function cc_load_xml_file_reader_onload(){
try{
var s=decodeURIComponent(escape(this.result));
}catch(error){
say_error('bad UTF-8',error);
return;
}
cc_load_xml_string(s);
utf8to16(cc_load_xml_string,loading_modal_abort,new Uint8Array(this.result),'',0);
}

function loading_modal_abort(){
current_gui().error_tag.textContent='bad UTF-8 (aborted)';
}

function cc_load_gzip(file){
Expand Down Expand Up @@ -361,10 +370,7 @@ function cc_load_aes_file_reader_onload(){
if(!decryptedbuf)throw Error('padding = 0');
var m=i---decryptedbuf;
while(i>m)if(decryptedbuf!==decrypted[--i])throw Error('padding bytes not all equal');
cc_load_xml_string(new TextDecoder('utf-8',{
'fatal':true,
'ignoreBOM':false
}).decode(decrypted.subarray(0,m)));
utf8to16(cc_load_xml_string,loading_modal_abort,decrypted.subarray(0,m),'',0);
}).catch(say_error.bind(null,'AES-ECB 256 decrypt'));
}catch(error){
say_error('AES-ECB 256 decrypt',error);
Expand Down Expand Up @@ -443,7 +449,7 @@ function cc_load_xml(file){
var fr=new FileReader();
fr.addEventListener('error',file_reader_onerror,onceel);
fr.addEventListener('load',cc_load_xml_file_reader_onload,onceel);
fr.readAsBinaryString(file);
fr.readAsArrayBuffer(file);
}catch(error){
say_error('FileReader',error);
}
Expand Down
24 changes: 14 additions & 10 deletions src/js/dict_editor.js
Original file line number Diff line number Diff line change
Expand Up @@ -216,10 +216,10 @@ function xml_ie_back(){

function xml_ie_show_error(e){
console.error(e);
var li=cre('li'),g=current_gui();
var li=cre('li');
li.style.backgroundColor='var(--Bad)';
li.textContent=e;
g.status.appendChild(li);
current_gui().status.appendChild(li);
set_loading(false);
}

Expand All @@ -228,13 +228,10 @@ function xml_ie_filereader_onerror(){
}

function xml_ie_filereader_onload(){
try{
var g=decodeURIComponent(escape(this.result));
}catch(error){
xml_ie_show_error('bad UTF-8');
console.error('original error:',error);
return;
}
utf8to16(xml_ie_utf8to16_callback,xml_ie_utf8to16_abort,new Uint8Array(this.result),'',0);
}

function xml_ie_utf8to16_callback(g){
try{
var d=document.implementation.createDocument(null,'d',null).documentElement;
d.innerHTML=g;
Expand All @@ -250,6 +247,13 @@ function xml_ie_filereader_onload(){
}
}

function xml_ie_utf8to16_abort(){
var li=cre('li');
li.style.backgroundColor='var(--Bad)';
li.textContent='bad UTF-8 (aborted)';
current_gui().status.appendChild(li);
}

function xml_ie_import(){
var g=current_gui(),f=g.last_blob_url;
if(f){
Expand All @@ -270,7 +274,7 @@ function xml_ie_import(){
g=new FileReader();
g.addEventListener('error',xml_ie_filereader_onerror,onceel);
g.addEventListener('load',xml_ie_filereader_onload,onceel);
g.readAsBinaryString(f);
g.readAsArrayBuffer(f);
}catch(error){
xml_ie_show_error(error);
}
Expand Down
21 changes: 12 additions & 9 deletions src/js/guimgr.js
Original file line number Diff line number Diff line change
Expand Up @@ -449,15 +449,13 @@ function AdvFileLoader(raw,out){
AdvFileLoader.prototype.handleEvent=function(e){
switch(e.type){
case 'load':
if(this.raw)e=e.target.result;
else try{
e=decodeURIComponent(escape(e.target.result));
}catch(error){
say_error('bad UTF-8',error);
if(this.raw){
this.out.setvalchg(e.target.result);
set_loading(false);
return;
}
this.out.setvalchg(e);
set_loading(false);
var r=this.out;
utf8to16(AdvFileLoader.utf8to16_callback.bind(this.out),Function.prototype,new Uint8Array(e.target.result),'',0);
return;
case 'error':
say_error('FileReader',e.target.error);
Expand All @@ -466,13 +464,18 @@ AdvFileLoader.prototype.handleEvent=function(e){
if(e=e.target.files[0]){
set_loading(true);
try{
var r=new FileReader();
r=new FileReader();
r.addEventListener('load',this,onceel);
r.addEventListener('error',this,onceel);
r.readAsBinaryString(e);
r[this.raw?'readAsBinaryString':'readAsArrayBuffer'](e);
}catch(error){
say_error('FileReader',error);
}
}
}
};

AdvFileLoader.utf8to16_callback=function(str){
this.setvalchg(str);
set_loading(false);
};
86 changes: 86 additions & 0 deletions src/js/utf8to16.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
'use strict';

//https://encoding.spec.whatwg.org/#utf-8-decoder

function utf8to16(callback_ok,callback_abort,buf,outstr,i){
var l=buf.length,chr=String.fromCharCode,b1,b2,b3,b4,j;
oof:do{
for(;i!==l;++i){
if(buf[j=i]<128){
if((b1=l-i)>8192)b1=8192;
while(++i,--b1&&buf[i]<128);
outstr+=chr.apply(null,buf.subarray(j,i--));
continue;
}
if((b1=buf[i])<194||b1>244
||++i===l)continue oof;
b2=buf[i];
//b1 is valid start of multi-byte code point
if(b1<224){
//110xxxxx (2 byte code point)
if(b2<128||b2>191)continue oof;
outstr+=chr(b2^(b1<<6)^12416);
continue;
}
if(b1<240){
//1110xxxx (3 byte code point)
if(b2<(b1===224?160:128)||b2>(b1===237?159:191)
||++i===l
||(b3=buf[i])<128||b3>191)continue oof;
outstr+=chr(b3^(b2<<6)^(b1<<12)^925824);
continue;
}
//11110xxx (4 byte code point)
if(b2<(b1===240?144:128)||b2>(b1===244?143:191)
||++i===l
||(b3=buf[i])<128||b3>191
||++i===l
||(b4=buf[i])<128||b4>191)continue oof;
outstr+=chr(
(b3>>4)+(b2<<2)+(b1<<8)-6728,
b4^((b3&15)<<6)^56448
);
}
callback_ok(outstr);
return;
}while(!callback_abort&&(i+=i===j,outstr+='\uFFFD'));

//ask how to handle error
//don't set tbl.innerHTML because browser adds <tbody> in html mode (non-xhtml)
var tbl=cre('table'),tr=cre('tr');
tbl.className='tableborder';
tr.innerHTML='<th colspan="2" style="background-color:var(--Bad);">bad UTF-8</th>';
tbl.appendChild(tr);
(tr=cre('tr')).innerHTML='<td>code point offset inside data:</td><td>'+j+'</td>';
tbl.appendChild(tr);
(tr=cre('tr')).innerHTML='<td>error offset inside code point:</td><td>'+(i-j)+'</td>';
tbl.appendChild(tr);
(tr=cre('tr')).innerHTML='<td>data length:</td><td>'+l+'</td>';
tbl.appendChild(tr);
(tr=cre('tr')).innerHTML='<td colspan="2"><input value="abort" data-a="a" type="button"/><input value="replace errors" title="replace errors with U+FFFD like str(data,&apos;utf-8&apos;,&apos;replace&apos;) in python3" data-a="r" type="button"/><a data-a="s" download="bad_utf8.bin" href="javascript:;" class="btn" style="display:inline-block;">save data</a></td>';
tr.firstChild.addEventListener('click',function(e){
switch((e=e.target).dataset.a){
case 'a':
pop_gui();
callback_abort();
set_loading(false);
return;
case 'r':
set_loading(true);
pop_gui();
utf8to16(callback_ok,null,buf,outstr+'\uFFFD',i+(i===j));
return;
case 's':
setTimeout(revoke_href,0,e);
e.href=URL.createObjectURL(new Blob([buf],binblobopts));
}
},capel);
tbl.appendChild(tr);
push_gui(tbl,true);
set_loading(false);
}

function revoke_href(a){
URL.revokeObjectURL(a.href);
a.href='javascript:;';
}

0 comments on commit e83b87f

Please sign in to comment.