Skip to content

Commit

Permalink
Implement KANJI mode properly (UTF-8 only)
Browse files Browse the repository at this point in the history
  • Loading branch information
kikuchan committed Jan 11, 2024
1 parent b446216 commit ee9969f
Show file tree
Hide file tree
Showing 10 changed files with 3,388 additions and 8,023 deletions.
15 changes: 14 additions & 1 deletion cli/qrean/qrean.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ int usage(FILE *out)
fprintf(out, " L, M, Q, H (for QR)\n");
fprintf(out, " L, M, Q (for mQR)\n");
fprintf(out, " M, H (for rMQR)\n");
fprintf(out, " -N Use Numeric mode only\n");
fprintf(out, " -A Use Alpha numeric mode only\n");
fprintf(out, " -K Use Kanji mode only\n");
fprintf(out, " -8 Use 8bit mode only\n");

return 1;
}
Expand Down Expand Up @@ -137,7 +141,7 @@ int main(int argc, char *argv[])
padding_t padding = create_padding1(4);
qrean_data_type_t data_type = QREAN_DATA_TYPE_AUTO;

while ((ch = getopt(argc, argv, "hi:o:s:f:t:v:l:m:p:8")) != -1) {
while ((ch = getopt(argc, argv, "hi:o:s:f:t:v:l:m:p:8KAN")) != -1) {
int n;
switch (ch) {
case 'h':
Expand Down Expand Up @@ -257,6 +261,15 @@ int main(int argc, char *argv[])
case '8':
data_type = QREAN_DATA_TYPE_8BIT;
break;
case 'K':
data_type = QREAN_DATA_TYPE_KANJI;
break;
case 'A':
data_type = QREAN_DATA_TYPE_ALNUM;
break;
case 'N':
data_type = QREAN_DATA_TYPE_NUMERIC;
break;
}
}

Expand Down
2 changes: 1 addition & 1 deletion dist/Qrean.js

Large diffs are not rendered by default.

Binary file modified dist/qrean-detect.exe
Binary file not shown.
Binary file modified dist/qrean.exe
Binary file not shown.
6 changes: 4 additions & 2 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ CFLAGS += -Wno-misleading-indentation
#CFLAGS += -D NO_CALLBACK
#CFLAGS += -D NO_CANVAS_BUFFER

#CFLAGS += -D NO_KANJI

#CFLAGS += -D NO_MQR
#CFLAGS += -D NO_RMQR

Expand All @@ -64,12 +66,12 @@ TARGET = $(BUILDDIR)/libqrean.a

all: $(TARGET)

$(TARGET): $(OBJS)
$(TARGET): $(OBJS) Makefile
$(AR) rcs $@ $^

-include $(DEPS)

$(BUILDDIR)/%.o: %.c
$(BUILDDIR)/%.o: %.c Makefile
@mkdir -p $(BUILDDIR)
$(CC) $(CFLAGS) -c -MMD -MP $< -o $@

Expand Down
151 changes: 110 additions & 41 deletions src/qrdata.c
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#include <string.h>

#include "qrdata.h"
#include "bitstream.h"
#include "debug.h"
#include "qrdata.h"
#include "qrkanji.h"
#include "qrspec.h"
#include "utils.h"
#include "qrkanji.h"

qrdata_t create_qrdata_for(bitstream_t bs, qr_version_t version)
{
Expand Down Expand Up @@ -45,10 +45,31 @@ static size_t measure_alnum(const char *src)
return strspn(src, alnum);
}

static size_t measure_kanji(const char *src, size_t *byte_consumed)
{
const char *ptr = src;
int len = 0;

while (*ptr) {
int consumed = qrkanji_from_utf8(ptr, NULL);
if (consumed < 0) break;
ptr += consumed;
len++;
}
if (byte_consumed) *byte_consumed = ptr - src;
return len;
}

#define LEN_CMP_8BIT(ch) (strcspn((ch), alnum))
#define LEN_CMP_ALNUM(ch) (strspn((ch), alnum_cmp))
#define LEN_NUMERIC(ch) (measure_numeric(ch))
#define LEN_ALNUM(ch) (measure_alnum(ch))
#define LEN_KANJI(ch, bytelen) (measure_kanji((ch), (bytelen)))

#define MQR_DATA_MODE_NUMERIC (0)
#define MQR_DATA_MODE_ALNUM (1)
#define MQR_DATA_MODE_8BIT (2)
#define MQR_DATA_MODE_KANJI (3)

#define RMQR_DATA_MODE_END (0)
#define RMQR_DATA_MODE_NUMERIC (1)
Expand All @@ -63,7 +84,7 @@ size_t qrdata_write_numeric_string(qrdata_t *data, const char *src, size_t len)
}

if (IS_MQR(data->version)) {
bitstream_write_bits(&data->bs, 0, data->version - QR_VERSION_M1);
bitstream_write_bits(&data->bs, MQR_DATA_MODE_NUMERIC, data->version - QR_VERSION_M1);
} else if (IS_RMQR(data->version)) {
bitstream_write_bits(&data->bs, RMQR_DATA_MODE_NUMERIC, 3);
} else if (IS_QR(data->version)) {
Expand Down Expand Up @@ -93,7 +114,7 @@ size_t qrdata_write_alnum_string(qrdata_t *data, const char *src, size_t len)

if (IS_MQR(data->version)) {
if (data->version == QR_VERSION_M1) return 0;
bitstream_write_bits(&data->bs, 1, data->version - QR_VERSION_M1);
bitstream_write_bits(&data->bs, MQR_DATA_MODE_ALNUM, data->version - QR_VERSION_M1);
} else if (IS_RMQR(data->version)) {
bitstream_write_bits(&data->bs, RMQR_DATA_MODE_ALNUM, 3);
} else if (IS_QR(data->version)) {
Expand Down Expand Up @@ -126,7 +147,7 @@ size_t qrdata_write_8bit_string(qrdata_t *data, const char *src, size_t len)

if (IS_MQR(data->version)) {
if (data->version <= QR_VERSION_M2) return 0;
bitstream_write_bits(&data->bs, 2, data->version - QR_VERSION_M1);
bitstream_write_bits(&data->bs, MQR_DATA_MODE_8BIT, data->version - QR_VERSION_M1);
} else if (IS_RMQR(data->version)) {
bitstream_write_bits(&data->bs, RMQR_DATA_MODE_8BIT, 3);
} else if (IS_QR(data->version)) {
Expand All @@ -145,6 +166,39 @@ size_t qrdata_write_8bit_string(qrdata_t *data, const char *src, size_t len)
return i;
}

size_t qrdata_write_kanji_string(qrdata_t *data, const char *src, size_t srclen)
{
size_t bytelen;
size_t kanjilen = measure_kanji(src, &bytelen);
if (bytelen < srclen) return 0;

if (IS_MQR(data->version)) {
if (data->version <= QR_VERSION_M2) return 0;
bitstream_write_bits(&data->bs, MQR_DATA_MODE_KANJI, data->version - QR_VERSION_M1);
} else if (IS_RMQR(data->version)) {
bitstream_write_bits(&data->bs, RMQR_DATA_MODE_KANJI, 3);
} else if (IS_QR(data->version)) {
bitstream_write_bits(&data->bs, QR_DATA_MODE_KANJI, 4);
} else {
// unsupported
return 0;
}

bitstream_write_bits(&data->bs, kanjilen, LENGTH_BIT_SIZE_FOR_KANJI(data->version));

size_t i;
for (i = 0; i < srclen && !bitstream_is_end(&data->bs);) {
uint16_t code;
int consumed = qrkanji_from_utf8(src + i, &code);
if (consumed < 0) break; // XXX:

bitstream_write_bits(&data->bs, code, 13);

i += consumed;
}
return i;
}

bit_t qrdata_finalize(qrdata_t *data)
{
if (IS_MQR(data->version)) {
Expand Down Expand Up @@ -184,6 +238,9 @@ static size_t qrdata_flush(qrdata_t *data, qr_data_mode_t mode, const char *src,
case QR_DATA_MODE_8BIT:
return qrdata_write_8bit_string(data, src, len);

case QR_DATA_MODE_KANJI:
return qrdata_write_kanji_string(data, src, len);

default:
return 0;
}
Expand All @@ -197,7 +254,9 @@ size_t qrdata_write_string(qrdata_t *data, const char *src, size_t len)
qr_data_mode_t mode;

// initial mode
if (LEN_CMP_8BIT(src) > 0) {
if (LEN_KANJI(src, NULL) > 0) {
mode = QR_DATA_MODE_KANJI;
} else if (LEN_CMP_8BIT(src) > 0) {
mode = QR_DATA_MODE_8BIT;
} else if ((l = LEN_CMP_ALNUM(src)) > 0) {
if (l < VERDEPNUM(v, 6, 7, 8) && l < len) {
Expand All @@ -219,27 +278,35 @@ size_t qrdata_write_string(qrdata_t *data, const char *src, size_t len)
size_t last_i = 0;
qr_data_mode_t last_mode = mode;

for (i = 0; i < len; i++) {
if (mode == QR_DATA_MODE_8BIT) {
if (LEN_NUMERIC(src + i) >= VERDEPNUM(v, 6, 8, 9)) {
mode = QR_DATA_MODE_NUMERIC;
} else if (LEN_ALNUM(src + i) >= VERDEPNUM(v, 11, 15, 16)) {
mode = QR_DATA_MODE_ALNUM;
}
} else if (mode == QR_DATA_MODE_ALNUM) {
if (LEN_CMP_8BIT(src + i) > 0) {
mode = QR_DATA_MODE_8BIT;
} else if (LEN_NUMERIC(src + i) >= VERDEPNUM(v, 13, 15, 17)) {
mode = QR_DATA_MODE_NUMERIC;
}
} else if (mode == QR_DATA_MODE_NUMERIC) {
if (LEN_CMP_8BIT(src + i) > 0) {
i = 0;
while (i < len) {
size_t bytelen;
if (LEN_KANJI(src + i, &bytelen) > 0) {
mode = QR_DATA_MODE_KANJI;
} else {
bytelen = 1;
if (mode == QR_DATA_MODE_KANJI || mode == QR_DATA_MODE_8BIT) {
mode = QR_DATA_MODE_8BIT;
} else if (LEN_CMP_ALNUM(src + i) > 0) {
mode = QR_DATA_MODE_ALNUM;
if (LEN_NUMERIC(src + i) >= VERDEPNUM(v, 6, 8, 9)) {
mode = QR_DATA_MODE_NUMERIC;
} else if (LEN_ALNUM(src + i) >= VERDEPNUM(v, 11, 15, 16)) {
mode = QR_DATA_MODE_ALNUM;
}
} else if (mode == QR_DATA_MODE_ALNUM) {
if (LEN_CMP_8BIT(src + i) > 0) {
mode = QR_DATA_MODE_8BIT;
} else if (LEN_NUMERIC(src + i) >= VERDEPNUM(v, 13, 15, 17)) {
mode = QR_DATA_MODE_NUMERIC;
}
} else if (mode == QR_DATA_MODE_NUMERIC) {
if (LEN_CMP_8BIT(src + i) > 0) {
mode = QR_DATA_MODE_8BIT;
} else if (LEN_CMP_ALNUM(src + i) > 0) {
mode = QR_DATA_MODE_ALNUM;
}
} else {
return 0;
}
} else {
return 0;
}

if (mode != last_mode) {
Expand All @@ -248,6 +315,8 @@ size_t qrdata_write_string(qrdata_t *data, const char *src, size_t len)
last_mode = mode;
last_i = i;
}

i += bytelen;
}

size_t r = last_i + qrdata_flush(data, last_mode, src + last_i, len - last_i);
Expand Down Expand Up @@ -364,6 +433,22 @@ size_t qrdata_parse(qrdata_t *data, void (*on_letter_cb)(qr_data_mode_t mode, co
}
break;

case QR_DATA_MODE_KANJI:
len = bitstream_read_bits(r, LENGTH_BIT_SIZE_FOR_KANJI(data->version));
if (len == 0) goto mode_end;
while (len-- > 0) {
char buf[4] = {};

uint16_t uni = qrkanji_to_utf8(bitstream_read_bits(r, 13), buf);
if (!uni) continue;

for (char *p = buf; *p; p++) {
on_letter_cb(mode, *p, opaque);
wrote++;
}
}
break;

case QR_DATA_MODE_ECI:;
{
uint_fast32_t eci;
Expand All @@ -386,22 +471,6 @@ size_t qrdata_parse(qrdata_t *data, void (*on_letter_cb)(qr_data_mode_t mode, co
// TODO:
}
break;
case QR_DATA_MODE_KANJI:
len = bitstream_read_bits(r, LENGTH_BIT_SIZE_FOR_KANJI(data->version));
if (len == 0) goto mode_end;
while (len-- > 0) {
uint16_t uni = qrkanji_to_unicode(bitstream_read_bits(r, 13));
if (!uni) continue;

char buf[4] = {};
unicode_to_utf8(uni, buf);

for (char *p = buf; *p; p++) {
on_letter_cb(mode, *p, opaque);
wrote++;
}
}
break;

case QR_DATA_MODE_STRUCTURED: {
int a = bitstream_read_bits(r, 8);
Expand Down
1 change: 1 addition & 0 deletions src/qrdata.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ void qrdata_free(qrdata_t *data);
typedef size_t (*qrdata_writer_t)(qrdata_t *data, const char *src, size_t len);

size_t qrdata_write_8bit_string(qrdata_t *data, const char *src, size_t len);
size_t qrdata_write_kanji_string(qrdata_t *data, const char *src, size_t len);
size_t qrdata_write_numeric_string(qrdata_t *data, const char *src, size_t len);
size_t qrdata_write_alnum_string(qrdata_t *data, const char *src, size_t len);
size_t qrdata_write_string(qrdata_t *data, const char *src, size_t len);
Expand Down
3 changes: 1 addition & 2 deletions src/qrean.c
Original file line number Diff line number Diff line change
Expand Up @@ -554,8 +554,7 @@ static size_t qrean_try_write_qr_data(qrean_t *qrean, const void *buffer, size_t
writer = qrdata_write_8bit_string;
break;
case QREAN_DATA_TYPE_KANJI:
writer = qrdata_write_8bit_string;
// writer = qrdata_write_kanji_string; // TODO
writer = qrdata_write_kanji_string;
break;
}

Expand Down
Loading

0 comments on commit ee9969f

Please sign in to comment.