Skip to content

Commit

Permalink
[DAPHNE-daphne-eu#770] Refactor getLine() to avoid potential memory leak
Browse files Browse the repository at this point in the history
According to [1], the use of the C function getLine() will automatically malloc() the required memory if NULL is passed as the first parameter. Since we constantly pass NULL in a loop without ever free()-ing the returned line this constitutes a memory leak. This commit changes the behaviour to always pass the same pointer that is now contained in the File struct. With this the malloc() happens once, the memory is subsequently realloc()-ed if needed and the pointer is passed to free() when closing the file.

[1] https://linux.die.net/man/3/getline
  • Loading branch information
psomas authored and corepointer committed Jun 28, 2024
1 parent 47e2621 commit c3266c0
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 45 deletions.
34 changes: 25 additions & 9 deletions src/runtime/local/io/File.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@
struct File {
FILE *identifier;
unsigned long pos;
unsigned long read;
long read;
char *line;
size_t line_len;
};

inline struct File *openMemFile(FILE *ident){
Expand All @@ -32,6 +34,9 @@ inline struct File *openMemFile(FILE *ident){
f->identifier = ident;
f->pos = 0;

f->line = NULL;
f->line_len = 0;

return f;
}

Expand All @@ -43,6 +48,10 @@ inline struct File *openFile(const char *filename) {

if (f->identifier == NULL)
return NULL;

f->line = NULL;
f->line_len = 0;

return f;
}

Expand All @@ -54,19 +63,26 @@ inline struct File *openFileForWrite(const char *filename) {

if (f->identifier == NULL)
return NULL;

f->line = NULL;
f->line_len = 0;

return f;
}

inline void closeFile(File *f) { fclose(f->identifier); }

inline char *getLine(File *f) {
char *line = NULL;
size_t len = 0;
inline void closeFile(File *f) {
fclose(f->identifier);
if (f->line) {
free(f->line);
}
}

f->read = getline(&line, &len, f->identifier);
f->pos += f->read;
inline ssize_t getFileLine(File *f) {
ssize_t ret = getline(&f->line, &f->line_len, f->identifier);
f->read = ret;
f->pos += ret;

return line;
return ret;
}

#endif
40 changes: 26 additions & 14 deletions src/runtime/local/io/MMFile.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,6 @@ inline int mm_is_valid(MM_typecode matcode)
#define MM_PATTERN_STR "pattern"

inline int mm_read_banner(File *f, MM_typecode *matcode){
char *line;
char banner[MM_MAX_TOKEN_LENGTH];
char mtx[MM_MAX_TOKEN_LENGTH];
char crd[MM_MAX_TOKEN_LENGTH];
Expand All @@ -142,11 +141,13 @@ inline int mm_read_banner(File *f, MM_typecode *matcode){


mm_clear_typecode(matcode);
line = getLine(f);
ssize_t ret = getFileLine(f);
if ((ssize_t)f->read == EOF)
return MM_PREMATURE_EOF;
if (ret == -1)
throw std::runtime_error("mm_read_banner: getFileLine failed");

if (sscanf(line, "%s %s %s %s %s", banner, mtx, crd, data_type,
if (sscanf(f->line, "%s %s %s %s %s", banner, mtx, crd, data_type,
storage_scheme) != 5)
return MM_PREMATURE_EOF;

Expand Down Expand Up @@ -221,8 +222,13 @@ inline int mm_read_mtx_crd_size(File *f, size_t *M, size_t *N, size_t *nz )
*M = *N = *nz = 0;
do
{
num_items_read = sscanf(getLine(f), "%lu %lu %lu", M, N, nz);
if ((ssize_t)f->read == EOF) return MM_PREMATURE_EOF;
ssize_t ret = getFileLine(f);
if ((ssize_t)f->read == EOF)
return MM_PREMATURE_EOF;
if (ret == -1)
throw std::runtime_error("mm_read_mtx_crd_size: getFileLine failed");

num_items_read = sscanf(f->line, "%lu %lu %lu", M, N, nz);
} while (num_items_read != 3);

return 0;
Expand All @@ -235,8 +241,12 @@ inline int mm_read_mtx_array_size(File *f, size_t *M, size_t *N)
*M = *N = 0;
do
{
num_items_read = sscanf(getLine(f), "%lu %lu", M, N);
if ((ssize_t)f->read == EOF) return MM_PREMATURE_EOF;
ssize_t ret = getFileLine(f);
if ((ssize_t)f->read == EOF)
return MM_PREMATURE_EOF;
if (ret == -1)
throw std::runtime_error("mm_read_mtx_array_size: getFileLine failed");
num_items_read = sscanf(f->line, "%lu %lu", M, N);
} while (num_items_read != 2);

return 0;
Expand Down Expand Up @@ -305,29 +315,31 @@ class MMFile
pointer m_ptr = (pointer)malloc(sizeof(Entry)*2), next = m_ptr+1;
bool do_next = false;
MMFile<VT> &file;
char *line{};
size_t r = 0, c = 0;
std::function<void()> progress = [&]() mutable { r = 0; c++; };
VT cur;
MMIterator() = default;
void readEntry(){
//TODO: Handle arbitrary blank lines
line = getLine(file.f);
ssize_t ret = getFileLine(file.f);
if((ssize_t)file.f->read == -1){
terminate();
return;
}
if (ret == -1)
throw std::runtime_error("MMIterator::readEntry: getFileLine failed");

size_t pos = 0;
if(mm_is_coordinate(file.typecode)){
//Assumes only single space between values
r = std::stoi(line) - 1;
while(line[pos++] != ' ');
c = std::stoi(line + pos) - 1;
while(line[pos] != ' ' && line[pos] != '\n') pos++;
r = std::stoi(file.f->line) - 1;
while(file.f->line[pos++] != ' ');
c = std::stoi(file.f->line + pos) - 1;
while(file.f->line[pos] != ' ' && file.f->line[pos] != '\n') pos++;
}

if(!mm_is_pattern(file.typecode))
convertCstr(line + pos, &cur);
convertCstr(file.f->line + pos, &cur);

*m_ptr = {r, c, cur};
if(mm_is_symmetric(file.typecode) && r != c){
Expand Down
45 changes: 24 additions & 21 deletions src/runtime/local/io/ReadCsvFile.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,12 @@ template <typename VT> struct ReadCsvFile<DenseMatrix<VT>> {
res = DataObjectFactory::create<DenseMatrix<VT>>(numRows, numCols, false);
}

char *line;
size_t cell = 0;
VT * valuesRes = res->getValues();

for(size_t r = 0; r < numRows; r++) {
line = getLine(file);
if (getFileLine(file) == -1)
throw std::runtime_error("ReadCsvFile::apply: getFileLine failed");
// TODO Assuming that the given numRows is available, this should never
// happen.
// if (line == NULL)
Expand All @@ -109,7 +109,7 @@ template <typename VT> struct ReadCsvFile<DenseMatrix<VT>> {
size_t pos = 0;
for(size_t c = 0; c < numCols; c++) {
VT val;
convertCstr(line + pos, &val);
convertCstr(file->line + pos, &val);

// TODO This assumes that rowSkip == numCols.
valuesRes[cell++] = val;
Expand All @@ -119,7 +119,7 @@ template <typename VT> struct ReadCsvFile<DenseMatrix<VT>> {
// we wouldn't have to search for that ourselves, just would need to
// check if it is really the delimiter.
if(c < numCols - 1) {
while(line[pos] != delim) pos++;
while(file->line[pos] != delim) pos++;
pos++; // skip delimiter
}
}
Expand Down Expand Up @@ -169,17 +169,17 @@ template <typename VT> struct ReadCsvFile<CSRMatrix<VT>> {
auto *colIdxs = res->getColIdxs();
auto *values = res->getValues();

char *line;
size_t pos;
uint64_t row;
uint64_t col;
for (size_t i = 0; i < numNonZeros; ++i) {
line = getLine(file);
convertCstr(line, &row);
if (getFileLine(file) == -1)
throw std::runtime_error("ReadCOOSorted::apply: getFileLine failed");
convertCstr(file->line, &row);
pos = 0;
while(line[pos] != delim) pos++;
while(file->line[pos] != delim) pos++;
pos++; // skip delimiter
convertCstr(line + pos, &col);
convertCstr(file->line + pos, &col);

rowOffsets[row + 1] += 1;
values[i] = 1;
Expand Down Expand Up @@ -251,7 +251,6 @@ template <> struct ReadCsvFile<Frame> {
res = DataObjectFactory::create<Frame>(numRows, numCols, schema, nullptr, false);
}

char *line;
size_t row = 0, col = 0;

uint8_t ** rawCols = new uint8_t * [numCols];
Expand All @@ -262,51 +261,55 @@ template <> struct ReadCsvFile<Frame> {
}

while (1) {
line = getLine(file);
if (line == NULL)
ssize_t ret = getFileLine(file);
if (file->read == EOF)
break;
if (file->line == NULL)
break;
if (ret == -1)
throw std::runtime_error("ReadCsvFile::apply: getFileLine failed");

size_t pos = 0;
while (1) {
switch (colTypes[col]) {
case ValueTypeCode::SI8:
int8_t val_si8;
convertCstr(line + pos, &val_si8);
convertCstr(file->line + pos, &val_si8);
reinterpret_cast<int8_t *>(rawCols[col])[row] = val_si8;
break;
case ValueTypeCode::SI32:
int32_t val_si32;
convertCstr(line + pos, &val_si32);
convertCstr(file->line + pos, &val_si32);
reinterpret_cast<int32_t *>(rawCols[col])[row] = val_si32;
break;
case ValueTypeCode::SI64:
int64_t val_si64;
convertCstr(line + pos, &val_si64);
convertCstr(file->line + pos, &val_si64);
reinterpret_cast<int64_t *>(rawCols[col])[row] = val_si64;
break;
case ValueTypeCode::UI8:
uint8_t val_ui8;
convertCstr(line + pos, &val_ui8);
convertCstr(file->line + pos, &val_ui8);
reinterpret_cast<uint8_t *>(rawCols[col])[row] = val_ui8;
break;
case ValueTypeCode::UI32:
uint32_t val_ui32;
convertCstr(line + pos, &val_ui32);
convertCstr(file->line + pos, &val_ui32);
reinterpret_cast<uint32_t *>(rawCols[col])[row] = val_ui32;
break;
case ValueTypeCode::UI64:
uint64_t val_ui64;
convertCstr(line + pos, &val_ui64);
convertCstr(file->line + pos, &val_ui64);
reinterpret_cast<uint64_t *>(rawCols[col])[row] = val_ui64;
break;
case ValueTypeCode::F32:
float val_f32;
convertCstr(line + pos, &val_f32);
convertCstr(file->line + pos, &val_f32);
reinterpret_cast<float *>(rawCols[col])[row] = val_f32;
break;
case ValueTypeCode::F64:
double val_f64;
convertCstr(line + pos, &val_f64);
convertCstr(file->line + pos, &val_f64);
reinterpret_cast<double *>(rawCols[col])[row] = val_f64;
break;
default:
Expand All @@ -321,7 +324,7 @@ template <> struct ReadCsvFile<Frame> {
// return a pointer to the first character after the parsed input, then
// we wouldn't have to search for that ourselves, just would need to
// check if it is really the delimiter.
while(line[pos] != delim) pos++;
while(file->line[pos] != delim) pos++;
pos++; // skip delimiter
}

Expand Down
3 changes: 2 additions & 1 deletion src/runtime/local/io/ReadParquet.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ inline struct File *arrowToCsv(const char *filename){

FILE *buf = fmemopen(ccsv, csv.size(), "r");
struct File *file = openMemFile(buf);
getLine(file); // Parquet has headers, readCsv does not expect that.
if (getFileLine(file) == -1) // Parquet has headers, readCsv does not expect that.
throw std::runtime_error("arrowToCsv: getFileLine failed");

return file;
}
Expand Down

0 comments on commit c3266c0

Please sign in to comment.