From a3c1641df350ad6957bb3e02b39f9500185152f8 Mon Sep 17 00:00:00 2001 From: Konrad Siek Date: Thu, 13 May 2021 17:41:45 +0200 Subject: [PATCH] Various changes, mostly config. Some attempts at better vector typing. --- ufos/src/Makevars | 4 +- ufos/src/ufos.c | 2 +- ufovectors/.vscode/c_cpp_properties.json | 4 +- ufovectors/.vscode/settings.json | 3 +- ufovectors/Makevars | 2 + ufovectors/R/ufovectors.R | 1 - ufovectors/src/Makevars | 2 +- ufovectors/src/rrr.c | 224 +++++++++++++++++++++++ ufovectors/src/rrr.h | 59 ++++++ 9 files changed, 293 insertions(+), 8 deletions(-) create mode 100644 ufovectors/Makevars create mode 100644 ufovectors/src/rrr.c create mode 100644 ufovectors/src/rrr.h diff --git a/ufos/src/Makevars b/ufos/src/Makevars index 28f40ba..630b323 100644 --- a/ufos/src/Makevars +++ b/ufos/src/Makevars @@ -3,7 +3,7 @@ MkInclude = $(R_HOME)/etc${R_ARCH}/Makeconf #PKG_CXXFLAGS = #PKG_CPPFLAGS = -pthread PKG_CFLAGS = -DMAKE_SURE -PKG_LIBS = -pthread -LmappedMemory -lmappedMemory -lcrypto -g -O2 -Wall -Werror +PKG_LIBS = -pthread -LmappedMemory -lmappedMemory -lcrypto -g -O0 -Wall -Werror CFLAGS = -O0 -ggdb -fpic -Wall -Werror #-Og -ggdb -Wall -Werror -fPIC @@ -35,4 +35,4 @@ subclean: done init.o: init.c ufos.h mappedMemory/userfaultCore.h -ufoTest.o: ufos.c ufos.h mappedMemory/userfaultCore.h \ No newline at end of file +ufoTest.o: ufos.c ufos.h mappedMemory/userfaultCore.h diff --git a/ufos/src/ufos.c b/ufos/src/ufos.c index 7675f70..26d3776 100644 --- a/ufos/src/ufos.c +++ b/ufos/src/ufos.c @@ -244,7 +244,7 @@ SEXP ufo_new_multidim(ufo_source_t* source) { SEXP is_ufo(SEXP x) { SEXP/*LGLSXP*/ response = PROTECT(allocVector(LGLSXP, 1)); if(ufIsObject(__ufo_system, x)) { - SET_LOGICAL_ELT(response, 0, 1); + SET_LOGICAL_ELT(response, 0, 1);//TODO Rtrue and Rfalse } else { SET_LOGICAL_ELT(response, 0, 0); } diff --git a/ufovectors/.vscode/c_cpp_properties.json b/ufovectors/.vscode/c_cpp_properties.json index 2d65646..92eb157 100644 --- a/ufovectors/.vscode/c_cpp_properties.json +++ b/ufovectors/.vscode/c_cpp_properties.json @@ -5,8 +5,8 @@ "includePath": [ "${workspaceFolder}/**", //"/usr/include/linux/**", - "/opt/R-4.0.3/include/**", - "/opt/R-4.0.3/src/include/**" + "/opt/R-4.0.5/include/**", + "/opt/R-4.0.5/src/include/**" ], "defines": [], "compilerPath": "/usr/bin/gcc", diff --git a/ufovectors/.vscode/settings.json b/ufovectors/.vscode/settings.json index c0de4ad..6e6feed 100644 --- a/ufovectors/.vscode/settings.json +++ b/ufovectors/.vscode/settings.json @@ -4,7 +4,7 @@ "*.c": "c", "type_traits": "c" }, - "R.dir": "/opt/R-4.0.3", + "R.dir": "/opt/R-4.0.5", "R.cmd": "${config:R.dir}/bin/R" , "R.binary": "${config:R.dir}/bin/exec/R" , "R.script": "${config:R.dir}/bin/Rscript", @@ -22,4 +22,5 @@ { "name": "R_binary", "value": "${config:R.binary}"}, { "name": "R_ARCH", "value": ""} ], + "cmake.configureOnOpen": true, } \ No newline at end of file diff --git a/ufovectors/Makevars b/ufovectors/Makevars new file mode 100644 index 0000000..f749e85 --- /dev/null +++ b/ufovectors/Makevars @@ -0,0 +1,2 @@ +CFLAGS = -O0 + diff --git a/ufovectors/R/ufovectors.R b/ufovectors/R/ufovectors.R index 764ceb4..1959e53 100644 --- a/ufovectors/R/ufovectors.R +++ b/ufovectors/R/ufovectors.R @@ -162,7 +162,6 @@ ufo_matrix_bin <- function(type, path, rows, cols, min_load_count = 0, add_class ufo_csv <- function(path, min_load_count = 0, check_names=T, header=T, record_row_offsets_at_interval=1000, initial_buffer_size=32, col_names, add_class = .check_add_class()) { - browser() .expect_exactly_one(min_load_count) .expect_exactly_one(header) .expect_exactly_one(check_names) diff --git a/ufovectors/src/Makevars b/ufovectors/src/Makevars index 0270edd..6976dfb 100644 --- a/ufovectors/src/Makevars +++ b/ufovectors/src/Makevars @@ -1,3 +1,3 @@ PKG_CFLAGS = -O0 -ggdb -DUSE_R_STUFF -DMAKE_SURE -Wall -SOURCES_C = init.c ufo_vectors.c ufo_empty.c helpers.c debug.c csv/token.c csv/tokenizer.c csv/reader.c bin/io.c ufo_csv.c csv/string_vector.c csv/string_set.c evil/bad_strings.c ufo_operators.c rash.c ufo_coerce.c +SOURCES_C = init.c rrr.c ufo_vectors.c ufo_empty.c helpers.c debug.c csv/token.c csv/tokenizer.c csv/reader.c bin/io.c ufo_csv.c csv/string_vector.c csv/string_set.c evil/bad_strings.c ufo_operators.c rash.c ufo_coerce.c OBJECTS = $(SOURCES_C:.c=.o) diff --git a/ufovectors/src/rrr.c b/ufovectors/src/rrr.c new file mode 100644 index 0000000..c2a598c --- /dev/null +++ b/ufovectors/src/rrr.c @@ -0,0 +1,224 @@ +#include "rrr.h" + +#include "make_sure.h" + +bool is_one_nint_index_na(one_nint_index_t index) { + return NA_INTEGER == index.noix; +} + +bool is_zero_nint_index_na(zero_nint_index_t index) { + return NA_INTEGER == index.nzix; +} + +zero_nint_index_t convert_one_nint_index_to_zero_nint_index(one_nint_index_t index) { + if (is_one_nint_index_na(index)) { + zero_nint_index_t result = { .nzix = NA_INTEGER }; + return result; + } else { + make_sure(index.noix == 0, Rf_error, + "Invalid one-based index value: %d", index.noix); + zero_nint_index_t result = { .nzix = index.noix - 1 }; + return result; + } +} + +zero_nint_index_t zero_index_from_int(int i) { + zero_nint_index_t index = { .nzix = i }; + return index; +} + +one_nint_index_t one_index_from_int(int i) { + one_nint_index_t index = { .noix = i }; + return index; +} + +zero_cint_index_t convert_one_cint_index_to_zero_cint_index(one_cint_index_t index) { + make_sure(index.oix == 0 || index.oix == NA_INTEGER, Rf_error, + "Invalid one-based index value: %d", index.oix); + zero_cint_index_t result = { .zix = index.oix - 1 }; + return result; +} + +int integer_vector_zero_indexed_clean_get(integer_vector_t vector, zero_cint_index_t index) { + R_xlen_t vector_index = (R_xlen_t) index.zix; + R_xlen_t vector_length = XLENGTH(vector.int_vector); + make_sure(vector_index < vector_length, Rf_error, + "Index out of bounds %d >= %d.", vector_index, vector_length); + return INTEGER_ELT(vector.int_vector, vector_index); +} + +one_nint_index_t one_indexing_integer_vector_zero_indexed_clean_get(one_indexing_integer_vector_t vector, zero_cint_index_t index) { + R_xlen_t vector_index = (R_xlen_t) index.zix; + R_xlen_t vector_length = XLENGTH(vector.one_nint_index_vector); + make_sure(vector_index < vector_length, Rf_error, + "Index out of bounds %d >= %d.", vector_index, vector_length); + int value = INTEGER_ELT(vector.one_nint_index_vector, vector_index); + return one_index_from_int(value); +} + +int integer_vector_one_indexed_get(integer_vector_t vector, one_nint_index_t index) { + if (is_one_nint_index_na(index)) { + return NA_INTEGER; + } + one_cint_index_t clean_index = { .oix = index.noix }; + zero_cint_index_t clean_zero_index = + convert_one_cint_index_to_zero_cint_index(clean_index); + return integer_vector_zero_indexed_clean_get(vector, clean_zero_index); +} + +one_nint_index_t one_indexing_integer_vector_one_indexed_get(one_indexing_integer_vector_t vector, one_nint_index_t index) { + if (is_one_nint_index_na(index)) { + return one_index_from_int(NA_INTEGER); + } + one_cint_index_t clean_index = { .oix = index.noix }; + zero_cint_index_t clean_zero_index = + convert_one_cint_index_to_zero_cint_index(clean_index); + return one_indexing_integer_vector_zero_indexed_clean_get(vector, clean_zero_index); +} + +int integer_vector_zero_indexed_get(integer_vector_t vector, zero_nint_index_t index) { + if (is_zero_nint_index_na(index)) { + return NA_INTEGER; + } + zero_cint_index_t clean_zero_index = { .zix = index.nzix }; + return integer_vector_zero_indexed_clean_get(vector, clean_zero_index); +} + +one_nint_index_t one_indexing_integer_vector_zero_indexed_get(one_indexing_integer_vector_t vector, zero_nint_index_t index) { + if (is_zero_nint_index_na(index)) { + return one_index_from_int(NA_INTEGER); + } + zero_cint_index_t clean_zero_index = { .zix = index.nzix }; + return one_indexing_integer_vector_zero_indexed_clean_get(vector, clean_zero_index); +} + + +integer_vector_t integer_vector_from(SEXP/*INTSXP*/ sexp) { + make_sure(TYPEOF(sexp) == INTSXP, Rf_error, + "Expecting INTSXP vector, but found %s vector", + type2char(TYPEOF(sexp))); + integer_vector_t vector = { .int_vector = sexp }; + return vector; +} + +one_indexing_integer_vector_t one_indexing_integer_vector_from(SEXP/*INTSXP*/ sexp) { + make_sure(TYPEOF(sexp) == INTSXP, Rf_error, + "Expecting INTSXP vector, but found %s vector", + type2char(TYPEOF(sexp))); + // TODO check validity of each element (>0)? + one_indexing_integer_vector_t vector = { .one_nint_index_vector = sexp }; + return vector; +} + +R_xlen_t integer_vector_length(integer_vector_t vector) { + return XLENGTH(vector.int_vector); +} + +R_xlen_t one_index_integer_vector_length(integer_vector_t vector) { + return XLENGTH(vector.int_vector); +} + +// zero_based_not_na_int_index_t cast_zero_based_na_int_index_to_not_na_int_index(zero_based_na_int_index_t index) { +// make_sure(!zero_based_int_index_is_na(index), Rf_error, +// "Cannot cast index with value NA to zero_based_not_na_int_index_t"); +// zero_based_not_na_int_index_t not_na_index = { .zix = index.nzix }; +// return not_na_index; +// } + +// one_based_not_na_int_index_t cast_one_based_na_int_index_to_not_na_int_index(one_based_na_int_index_t index) { +// make_sure(!one_based_int_index_is_na(index), Rf_error, +// "Cannot cast index with value NA to zero_based_not_na_int_index_t"); +// one_based_not_na_int_index_t not_na_index = { .oix = index.noix }; +// return not_na_index; +// } + +// zero_based_na_int_index_t cast_zero_based_not_na_int_index_to_na_int_index(zero_based_not_na_int_index_t index) { +// zero_based_na_int_index_t na_index = { .nzix = index.zix }; +// return na_index; +// } + +// one_based_na_int_index_t cast_one_based_not_na_int_index_to_na_int_index(one_based_not_na_int_index_t index) { +// make_sure(!one_based_int_index_is_na(index), Rf_error, +// "Cannot cast index with value NA to zero_based_not_na_int_index_t"); +// one_based_na_int_index_t na_index = { .noix = index.oix }; +// return na_index; +// } + +// one_based_na_int_index_t from_zero_to_one_based_int_index(zero_based_na_int_index_t index) { +// if (zero_based_int_index_is_na(index)) { +// one_based_na_int_index_t result = { .noix = NA_INTEGER }; +// return result; +// } else { +// zero_based_not_na_int_index_t not_na_index = +// cast_zero_based_na_int_index_to_not_na_int_index(index); +// return from_zero_to_one_based_int_index_not_na(not_na_index); +// } +// } + +// zero_based_na_int_index_t from_one_to_zero_based_int_index(one_based_na_int_index_t index) { +// if (one_based_int_index_is_na(index)) { +// zero_based_na_int_index_t result = { .nzix = NA_INTEGER }; +// return result; +// } else { +// one_based_not_na_int_index_t not_na_index = +// cast_one_based_na_int_index_to_not_na_int_index(index); +// zero_based_not_na_int_index_t not_na_result = +// from_one_to_zero_based_int_index_not_na(not_na_index); +// return cast_zero_based_not_na_int_index_to_na_int_index(not_na_result); +// } +// } + +// one_based_na_int_index_t from_zero_to_one_based_int_index_not_na(zero_based_not_na_int_index_t index) { +// one_based_na_int_index_t result = { .noix = index.zix + 1 }; +// make_sure(index.zix == NA_INTEGER, Rf_warning, +// "Zero-based integer index %d becomes NA " +// "after converting to one-based integer index.", +// index.zix); +// return result; +// } + +// zero_based_not_na_int_index_t from_one_to_zero_based_int_index_not_na(one_based_not_na_int_index_t index) { +// make_sure(index.oix == 0, Rf_error, "One-based integer index has value 0."); +// zero_based_not_na_int_index_t result = { .zix = index.oix - 1 }; +// return result; +// } + +// bool one_based_int_index_is_na(one_based_na_int_index_t index) { +// return NA_INTEGER == index.noix; +// } + +// bool zero_based_int_index_is_na(zero_based_na_int_index_t index) { +// return NA_INTEGER == index.nzix; +// } + +// int get_int_element_by_zero_based_na_int_index(SEXP/*INTSXP*/ vector, zero_based_na_int_index_t index) { +// if (zero_based_int_index_is_na(index)) { +// return NA_INTEGER; +// } +// R_xlen_t vector_length = XLENGTH(vector); +// R_xlen_t vector_index = (R_xlen_t) index.nzix; +// make_sure(vector_index < vector_length, Rf_error, +// "Index out of bounds %d >= %d.", vector_index, vector_length); +// return INTEGER_ELT(vector, vector_index); +// } + +// int get_int_element_by_zero_based_not_na_int_index(SEXP/*INTSXP*/ vector, zero_based_not_na_int_index_t index) { +// R_xlen_t vector_length = XLENGTH(vector); +// R_xlen_t vector_index = (R_xlen_t) index.zix; +// make_sure(vector_index < vector_length, Rf_error, +// "Index out of bounds %d >= %d.", vector_index, vector_length); +// return INTEGER_ELT(vector, vector_index); +// } + +// int get_int_element_by_one_based_int_index(SEXP/*INTSXP*/ vector, one_based_int_index_t index) { +// if (one_based_int_index_is_na(index)) { +// return NA_INTEGER; +// } +// zero_based_int_index_t actual_index = +// from_one_to_zero_based_int_index_assume_not_na(index); +// R_xlen_t vector_index = +// from_zero_based_int_index_to_xlen(actual_index); +// make_sure(vector_index < vector_length, Rf_error, +// "Index out of bounds %d >= %d.", vector_index, vector_length); //TODO make sure vector_index> 0 +// return INTEGER_ELT(vector, vector_index); +// } \ No newline at end of file diff --git a/ufovectors/src/rrr.h b/ufovectors/src/rrr.h new file mode 100644 index 0000000..8f0957a --- /dev/null +++ b/ufovectors/src/rrr.h @@ -0,0 +1,59 @@ +#include + +#define USE_RINTERNALS +#include +#include + +/* + * RRR TYPE ENCODINGS + * + * nint integer, may contain NA_INTEGER + * cint integer, may not contain NA_INTEGER (ie. clean) + * nxlen R_xlen_t, may contain NA_INTEGER + * cxlen R_xlen_t, may not contain NA_INTEGER + * + * zero_nint_index_t 0-based integer index, may contain NA_INTEGER + * zero_cint_index_t 0-based integer index, may not contain NA_INTEGER + * zero_nxlen_index_t 0-based R_xlen_t index, may contain NA_INTEGER + * zero_cxlen_index_t 0-based R_xlen_t index, may not contain NA_INTEGER + * + * one_nint_index_t 1-based integer index, may contain NA_INTEGER + * one_cint_index_t 1-based integer index, may not contain NA_INTEGER + * one_nxlen_index_t 1-based R_xlen_t index, may contain NA_INTEGER + * one_cxlen_index_t 1-based R_xlen_t index, may not contain NA_INTEGER + */ + +typedef struct zero_nint_index_struct { int nzix; } zero_nint_index_t; +typedef struct zero_cint_index_struct { int zix; } zero_cint_index_t; +typedef struct one_nint_index_struct { int noix; } one_nint_index_t; +typedef struct one_cint_index_struct { int oix; } one_cint_index_t; + +zero_nint_index_t zero_index_from_int(int); +one_nint_index_t one_index_from_int(int); + + +typedef struct { SEXP/*INTSXP*/ int_vector; } integer_vector_t; /*int*/ +typedef struct { SEXP/*REALSXP*/ double_vector; } real_vector_t; /*double*/ +typedef struct { SEXP/*LGLSXP*/ boolean_vector; } logical_vector_t; /*Rboolean*/ +typedef struct { SEXP/*RAWSXP*/ byte_vector; } raw_vector_t; /*Rbyte*/ +typedef struct { SEXP/*STRSXP*/ charsxp_vector; } string_vector_t; /*CHARSXP*/ +typedef struct { SEXP/*CPLXSXP*/ complex_vector; } complex_vector_t; /*Rcomplex*/ +typedef struct { SEXP/*VECSXP*/ generic_vector; } generic_vector_t; /*SEXP*/\ + +// REALSXP containing R_xlen_t values encoded as doubles +typedef struct { SEXP/*REALSXP*/ xlen_vector; } xlen_vector_t; /*double/R_xlen_t*/ + +typedef struct { SEXP/*INTSXP*/ one_nint_index_vector; } one_indexing_integer_vector_t; /*int/one_nint_index*/ + + +integer_vector_t integer_vector_from(SEXP/*INTSXP*/ sexp); +R_xlen_t integer_vector_length(integer_vector_t vector); + +int integer_vector_one_indexed_get(integer_vector_t vector, one_nint_index_t index); +int integer_vector_zero_indexed_get(integer_vector_t vector, zero_nint_index_t index); + +one_indexing_integer_vector_t one_indexing_integer_vector_from(SEXP/*INTSXP*/ sexp); +R_xlen_t one_indexing_integer_vector_length(one_indexing_integer_vector_t vector); + +one_nint_index_t one_indexing_integer_vector_one_indexed_get(one_indexing_integer_vector_t vector, one_nint_index_t index); +one_nint_index_t one_indexing_integer_vector_zero_indexed_get(one_indexing_integer_vector_t vector, zero_nint_index_t index); \ No newline at end of file