Skip to content

Commit

Permalink
added Table::read options
Browse files Browse the repository at this point in the history
  • Loading branch information
OriHoch committed Nov 30, 2017
1 parent 3748750 commit f97fd9d
Show file tree
Hide file tree
Showing 6 changed files with 194 additions and 47 deletions.
23 changes: 23 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,29 @@ Optionally, specify a [CSV Dialect](https://frictionlessdata.io/specs/csv-dialec
$table = new Table("tests/fixtures/data.csv", null, ["delimiter" => ";"]);
```

Table::read method allows to get all data as an array, it also supports options to modify reader behavior

```php
$table->read() // returns all the data as an array
```

read accepts an options parameter, for example:

```php
$table->read(["cast" => false, "limit": 5])
```

The following options are available (the values are the default values):

```php
$table->read([
"keyed" => true, // flag to emit keyed rows
"extended" => false, // flag to emit extended rows
"cast" => true, //flag to disable data casting if false
"limit" => null, // integer limit of rows to return
]);
```

Additional methods and functionality

```php
Expand Down
93 changes: 55 additions & 38 deletions src/CsvDialect.php
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,21 @@ public function __construct($dialect = null)
}
}

public function parseRow($line)
/**
* Parses the csv row according to the csv dialect.
*
* Returns an array of fields parsed form the line
*
* In case of line termination inside an enclosed field, the last field will contain a ContinueEnclosedField object
*
* @param $line string
*
* @return array
*
* @throws DataSourceException
* @throws \Exception
*/
public function parseRow($line, $continueLine = null)
{
// RFC4180 - Each record is located on a separate line, delimited by a line break (CRLF)
// Tabular Data - The line terminator character MUST be LF or CRLF
Expand All @@ -102,6 +116,15 @@ public function parseRow($line)
$fields = [];
$field = -1;
$lastCharPos = mb_strlen($line) - 1;
if ($continueLine) {
if (!is_a($continueLine[count($continueLine) - 1], 'frictionlessdata\\tableschema\\ContinueEnclosedField')) {
throw new \Exception('invalid continueLine');
}
unset($continueLine[count($continueLine) - 1]);
$fields = $continueLine;
$field = count($fields) - 1;
$enclosed = true;
}
for ($charPos = 0; $charPos < mb_strlen($line); ++$charPos) {
$char = mb_substr($line, $charPos, 1);
if ($enclosed === null) {
Expand All @@ -116,50 +139,40 @@ public function parseRow($line)
++$field;
$fields[$field] = '';
}
continue;
} else {
++$field;
$fields[$field] = '';
if ($char == $this->dialect['quoteChar']) {
$enclosed = true;
continue;
} else {
$enclosed = false;
$fields[$field] .= $char;
continue;
}
}
} elseif ($enclosed) {
// processing an enclosed field
if ($this->dialect['doubleQuote'] !== null && $char == $this->dialect['quoteChar']) {
// encountered quote in doubleQuote mode
if ($charPos !== 0 && mb_substr($line, $charPos - 1, 1) == $this->dialect['quoteChar']) {
// previous char was also a double quote
// the quote was added in previous iteration, nothing to do here
continue;
} elseif ($charPos != $lastCharPos && mb_substr($line, $charPos + 1, 1) == $this->dialect['quoteChar']) {
// next char is a also a double quote - add a quote to the field
$fields[$field] .= $this->dialect['quoteChar'];
continue;
}
}
if ($this->dialect['escapeChar']) {
// handle escape chars
if ($char == $this->dialect['escapeChar']) {
// char is the escape char, add the escaped char to the string
if ($charPos === $lastCharPos) {
throw new DataSourceException('Encountered escape char at end of line');
} else {
$fields[$field] .= mb_substr($line, $charPos + 1, 1);
}
continue;
} elseif ($charPos != 0 && mb_substr($line, $charPos - 1, 1) == $this->dialect['escapeChar']) {
// previous char was the escape string
// added the char in previous iteration, nothing to do here
continue;
if (
$this->dialect['doubleQuote'] !== null && $char == $this->dialect['quoteChar']
&& $charPos != $lastCharPos && mb_substr($line, $charPos + 1, 1) == $this->dialect['quoteChar']
) {
// doubleQuote mode is active, current char is a quote and next char is a quote
$fields[$field] .= $this->dialect['quoteChar'];
// skip a char
++$charPos;
continue;
} elseif (
$this->dialect['escapeChar'] && $char === $this->dialect['escapeChar']
) {
// encountered escape char, add the escaped char to the string
if ($charPos === $lastCharPos) {
throw new DataSourceException('Encountered escape char at end of line');
} else {
$fields[$field] .= mb_substr($line, $charPos + 1, 1);
}
}
if ($char == $this->dialect['quoteChar']) {
// skip a char
++$charPos;
continue;
} elseif ($char == $this->dialect['quoteChar']) {
// encountered a quote signifying the end of the enclosed field
$enclosed = null;
continue;
Expand Down Expand Up @@ -193,15 +206,19 @@ public function parseRow($line)
}
}
}
if (count($fields) > 1 && mb_strlen($fields[count($fields) - 1]) == 0) {
throw new \Exception('Invalid csv file - line must not end with a comma');
}
if ($this->dialect['skipInitialSpace']) {
return array_map(function ($field) {
$fields = array_map(function ($field) {
return ltrim($field);
}, $fields);
} else {
return $fields;
}
if ($enclosed === true && !is_a($fields[count($fields) - 1], 'frictionlessdata\\tableschema\\ContinueEnclosedField')) {
$fields[$field + 1] = new ContinueEnclosedField();
}

return $fields;
}
}

class ContinueEnclosedField
{
}
15 changes: 12 additions & 3 deletions src/DataSources/CsvDataSource.php
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,12 @@ public function getNextLine()
$this->nextRow = null;
$colNum = 0;
$obj = [];
if (count($row) != count($this->headerRow)) {
throw new DataSourceException('Invalid row: '.implode(', ', $row));
}
foreach ($this->headerRow as $fieldName) {
$obj[$fieldName] = $row[$colNum++];
$obj[$fieldName] = $row[$colNum];
++$colNum;
}

return $obj;
Expand Down Expand Up @@ -168,7 +172,7 @@ public function save($outputDataSource)
*
* @throws DataSourceException
*/
protected function getRow()
protected function getRow($continueRow = null)
{
++$this->curRowNum;
try {
Expand All @@ -177,6 +181,11 @@ protected function getRow()
throw new DataSourceException($e->getMessage(), $this->curRowNum);
}

return $this->csvDialect->parseRow($line);
$row = $this->csvDialect->parseRow($line, $continueRow);
if (count($row) > 0 && is_a($row[count($row) - 1], 'frictionlessdata\\tableschema\\ContinueEnclosedField')) {
return $this->getRow($row);
} else {
return $row;
}
}
}
40 changes: 37 additions & 3 deletions src/Table.php
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,45 @@ public function headers($numPeekRows = 10)
return array_keys($this->schema->fields());
}

public function read()
public function read($options = null)
{
$options = array_merge([
'keyed' => true,
'extended' => false,
'cast' => true,
'limit' => null,
], $options ? $options : []);
$rows = [];
foreach ($this as $row) {
$rows[] = $row;
$rowNum = 0;
if ($options['extended']) {
$headers = $this->headers($options['limit'] ? $options['limit'] : null);
}
if (!$options['cast']) {
$this->dataSource->open();
while (!$this->dataSource->isEof()) {
$row = $this->dataSource->getNextLine();
if ($options['extended']) {
$rows[] = [$rowNum, $headers, array_values($row)];
} else {
$rows[] = $row;
}
if ($options['limit'] && $options['limit'] > 0 && $rowNum + 1 >= $options['limit']) {
break;
}
++$rowNum;
}
} else {
foreach ($this as $row) {
if ($options['extended']) {
$rows[] = [$rowNum, $headers, array_values($row)];
} else {
$rows[] = $row;
}
if ($options['limit'] && $options['limit'] > 0 && $rowNum + 1 >= $options['limit']) {
break;
}
++$rowNum;
}
}

return $rows;
Expand Down
66 changes: 63 additions & 3 deletions tests/TableTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -234,17 +234,26 @@ public function testCsvDialectLolsv()
], $rows);
}

public function testCsvLineBreak()
{
$table = new Table($this->fixture('data_linebreaks.csv'));
$this->assertEquals([
['aaa' => 'test a', 'bbb' => 'test b', 'ccc' => 'test c'],
], $table->read());
}

public function testCsvDialectDatapackagePipelines()
{
$datapackage = json_decode(file_get_contents($this->fixture('committees/datapackage.json')));
$resource = $datapackage->resources[0];
$table = new Table($this->fixture('committees/kns_committee.csv'), $resource->schema, $resource->dialect);
$rows = [];
$rowNum = 0;
foreach ($table as $row) {
$rows[] = $row;
if (count($rows) == 2) {
break;
if (in_array($rowNum, [0, 1, 132])) {
$rows[] = $row;
}
++$rowNum;
}
$this->assertEquals([[
'CommitteeID' => 97,
Expand Down Expand Up @@ -280,9 +289,60 @@ public function testCsvDialectDatapackagePipelines()
'CommitteeParentName' => null,
'IsCurrent' => true,
'LastUpdatedDate' => Carbon::create(2015, 3, 20, 12, 2, 57),
], [
'CommitteeID' => 679,
'Name' => 'משותפת לכלכלה וחינוך לדיון בחוק הרשות השניה לטלויזיה ורדיו התש"ן-1990',
'CategoryID' => 317,
'CategoryDesc' => 'ועדה משותפת לכלכלה וחינוך לדיון בחוק הרשות השניה לטלוויזיה ורדיו, התש"ן-1990',
'KnessetNum' => 18,
'CommitteeTypeID' => 73,
'CommitteeTypeDesc' => 'ועדה משותפת',
'Email' => '[email protected]',
'StartDate' => Carbon::create(2009, 6, 30, 0, 0, 0),
'FinishDate' => null,
'AdditionalTypeID' => 991,
'AdditionalTypeDesc' => 'קבועה',
'ParentCommitteeID' => null,
'CommitteeParentName' => null,
'IsCurrent' => true,
'LastUpdatedDate' => Carbon::create(2015, 3, 20, 12, 2, 57),
]], $rows);
}

public function testReadOptions()
{
$datapackage = json_decode(file_get_contents($this->fixture('committees/datapackage.json')));
$resource = $datapackage->resources[0];
$table = new Table($this->fixture('committees/kns_committee.csv'), $resource->schema, $resource->dialect);
$this->assertEquals([
[
0,
[
'CommitteeID', 'Name', 'CategoryID', 'CategoryDesc', 'KnessetNum', 'CommitteeTypeID',
'CommitteeTypeDesc', 'Email', 'StartDate', 'FinishDate', 'AdditionalTypeID',
'AdditionalTypeDesc', 'ParentCommitteeID', 'CommitteeParentName', 'IsCurrent', 'LastUpdatedDate',
], [
'97', 'ה"ח המדיניות הכלכלית לשנת הכספים 2004', '', '', '16', '73', 'ועדה משותפת', '',
'2004-08-12 00:00:00', '', '',
'', '', '', 'True', '2015-03-20 12:02:57',
],
],
[
1,
[
'CommitteeID', 'Name', 'CategoryID', 'CategoryDesc', 'KnessetNum', 'CommitteeTypeID',
'CommitteeTypeDesc', 'Email', 'StartDate', 'FinishDate', 'AdditionalTypeID',
'AdditionalTypeDesc', 'ParentCommitteeID', 'CommitteeParentName', 'IsCurrent', 'LastUpdatedDate',
],
[
'314', 'המיוחדת לענין לקחי אסון גשר המכביה', '', '', '14', '72', 'ועדה מיוחדת', '',
'1988-10-19 00:00:00', '', '992',
'מיוחדת', '', '', 'True', '2015-03-20 12:02:57',
],
],
], $table->read(['keyed' => false, 'extended' => true, 'cast' => false, 'limit' => 2]));
}

protected $fixturesPath;
protected $validSchema;

Expand Down
4 changes: 4 additions & 0 deletions tests/fixtures/data_linebreaks.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
aaa,bbb,ccc
"test a
","test b","test c
"

0 comments on commit f97fd9d

Please sign in to comment.