mirror of
https://github.com/FourierTransformer/ftcsv.git
synced 2024-11-19 19:54:23 +00:00
Make the delimiter optional in the encoder (#45)
This commit is contained in:
parent
11f1c6e437
commit
e6324c4014
56
README.md
56
README.md
@ -17,7 +17,7 @@ luarocks install ftcsv
|
|||||||
There are two main parsing methods: `ftcv.parse` and `ftcsv.parseLine`.
|
There are two main parsing methods: `ftcv.parse` and `ftcsv.parseLine`.
|
||||||
`ftcsv.parse` loads the entire file and parses it, while `ftcsv.parseLine` is an iterator that parses one line at a time.
|
`ftcsv.parse` loads the entire file and parses it, while `ftcsv.parseLine` is an iterator that parses one line at a time.
|
||||||
|
|
||||||
### `ftcsv.parse(fileName, [, options])`
|
### `ftcsv.parse(fileName [, options])`
|
||||||
`ftcsv.parse` will load the entire csv file into memory, then parse it in one go, returning a lua table with the parsed data and a lua table containing the column headers. It has only one required parameter - the file name. A few optional parameters can be passed in via a table (examples below).
|
`ftcsv.parse` will load the entire csv file into memory, then parse it in one go, returning a lua table with the parsed data and a lua table containing the column headers. It has only one required parameter - the file name. A few optional parameters can be passed in via a table (examples below).
|
||||||
|
|
||||||
Just loading a csv file:
|
Just loading a csv file:
|
||||||
@ -26,7 +26,7 @@ local ftcsv = require('ftcsv')
|
|||||||
local zipcodes, headers = ftcsv.parse("free-zipcode-database.csv")
|
local zipcodes, headers = ftcsv.parse("free-zipcode-database.csv")
|
||||||
```
|
```
|
||||||
|
|
||||||
### `ftcsv.parseLine(fileName, [, options])`
|
### `ftcsv.parseLine(fileName [, options])`
|
||||||
`ftcsv.parseLine` will open a file and read `options.bufferSize` bytes of the file. `bufferSize` defaults to 2^16 bytes (which provides the fastest parsing on most unix-based systems), or can be specified in the options. `ftcsv.parseLine` is an iterator and returns one line at a time. When all the lines in the buffer are read, it will read in another `bufferSize` bytes of a file and repeat the process until the entire file has been read.
|
`ftcsv.parseLine` will open a file and read `options.bufferSize` bytes of the file. `bufferSize` defaults to 2^16 bytes (which provides the fastest parsing on most unix-based systems), or can be specified in the options. `ftcsv.parseLine` is an iterator and returns one line at a time. When all the lines in the buffer are read, it will read in another `bufferSize` bytes of a file and repeat the process until the entire file has been read.
|
||||||
|
|
||||||
If specifying `bufferSize` there are a couple of things to remember:
|
If specifying `bufferSize` there are a couple of things to remember:
|
||||||
@ -48,7 +48,7 @@ end
|
|||||||
The options are the same for `parseLine` and `parse`, with the exception of `loadFromString` and `bufferSize`. `loadFromString` only works with `parse` and `bufferSize` can only be specified for `parseLine`.
|
The options are the same for `parseLine` and `parse`, with the exception of `loadFromString` and `bufferSize`. `loadFromString` only works with `parse` and `bufferSize` can only be specified for `parseLine`.
|
||||||
|
|
||||||
The following are optional parameters passed in via the third argument as a table.
|
The following are optional parameters passed in via the third argument as a table.
|
||||||
- `delimeter`
|
- `delimiter`
|
||||||
|
|
||||||
If your file doesn't use the comma character as the delimiter, you can specify your own. It is limited to one character and defaults to `,`
|
If your file doesn't use the comma character as the delimiter, you can specify your own. It is limited to one character and defaults to `,`
|
||||||
```lua
|
```lua
|
||||||
@ -131,23 +131,36 @@ ftcsv.parse("apple,banana,carrot", {loadFromString=true, headers=false})
|
|||||||
```
|
```
|
||||||
|
|
||||||
## Encoding
|
## Encoding
|
||||||
### `ftcsv.encode(inputTable, delimiter[, options])`
|
### `ftcsv.encode(inputTable [, options])`
|
||||||
|
|
||||||
`ftcsv.encode` takes in a lua table and turns it into a text string that can be written to a file. It has two required parameters, an inputTable and a delimiter. You can use it to write out a file like this:
|
`ftcsv.encode` takes in a lua table and turns it into a text string that can be written to a file. You can use it to write out a file like this:
|
||||||
```lua
|
```lua
|
||||||
local fileOutput = ftcsv.encode(users, ",")
|
local users = {
|
||||||
|
{name="alice", fruit="apple"},
|
||||||
|
{name="bob", fruit="banana"},
|
||||||
|
{name="eve", fruit="pear"}
|
||||||
|
}
|
||||||
|
local fileOutput = ftcsv.encode(users)
|
||||||
local file = assert(io.open("ALLUSERS.csv", "w"))
|
local file = assert(io.open("ALLUSERS.csv", "w"))
|
||||||
file:write(fileOutput)
|
file:write(fileOutput)
|
||||||
file:close()
|
file:close()
|
||||||
```
|
```
|
||||||
|
|
||||||
### Options
|
### Options
|
||||||
|
- `delimiter`
|
||||||
|
|
||||||
|
by default the encoder uses a `,` as a delimiter. The delimiter can be changed by setting a value for `delimiter`
|
||||||
|
|
||||||
|
```lua
|
||||||
|
local output = ftcsv.encode(everyUser, {delimiter="\t"})
|
||||||
|
```
|
||||||
|
|
||||||
- `fieldsToKeep`
|
- `fieldsToKeep`
|
||||||
|
|
||||||
if `fieldsToKeep` is set in the encode process, only the fields specified will be written out to a file. The `fieldsToKeep` will be written out in the order that is specified.
|
if `fieldsToKeep` is set in the encode process, only the fields specified will be written out to a file. The `fieldsToKeep` will be written out in the order that is specified.
|
||||||
|
|
||||||
```lua
|
```lua
|
||||||
local output = ftcsv.encode(everyUser, ",", {fieldsToKeep={"Name", "Phone", "City"}})
|
local output = ftcsv.encode(everyUser, {fieldsToKeep={"Name", "Phone", "City"}})
|
||||||
```
|
```
|
||||||
|
|
||||||
- `onlyRequiredQuotes`
|
- `onlyRequiredQuotes`
|
||||||
@ -155,13 +168,29 @@ file:close()
|
|||||||
if `onlyRequiredQuotes` is set to `true`, the output will only include quotes around fields that are quotes, have newlines, or contain the delimter.
|
if `onlyRequiredQuotes` is set to `true`, the output will only include quotes around fields that are quotes, have newlines, or contain the delimter.
|
||||||
|
|
||||||
```lua
|
```lua
|
||||||
local output = ftcsv.encode(everyUser, ",", {onlyRequiredQuotes=true})
|
local output = ftcsv.encode(everyUser, {onlyRequiredQuotes=true})
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
## Error Handling
|
## Error Handling
|
||||||
ftcsv returns a litany of errors when passed a bad csv file or incorrect parameters. You can find a more detailed explanation of the more cryptic errors in [ERRORS.md](ERRORS.md)
|
ftcsv returns a litany of errors when passed a bad csv file or incorrect parameters. You can find a more detailed explanation of the more cryptic errors in [ERRORS.md](ERRORS.md)
|
||||||
|
|
||||||
|
## Delimiter no longer required from 1.4.0!
|
||||||
|
Starting with version 1.4.0, the delimiter no longer required as the second argument. **But don't worry,** ftcsv remains backwards compatible! We check the argument types and adjust parsing as necessary. There is no intention to remove this backwards compatibility layer, so your existing code should just keep on working!
|
||||||
|
|
||||||
|
So this works just fine:
|
||||||
|
```lua
|
||||||
|
ftcsv.parse("a>b>c\r\n1,2,3", ">", {loadFromString=true})
|
||||||
|
```
|
||||||
|
|
||||||
|
as well as:
|
||||||
|
```lua
|
||||||
|
ftcsv.encode(users, ",")
|
||||||
|
```
|
||||||
|
|
||||||
|
The delimiter as the second argument will always take precedent if both are provided.
|
||||||
|
|
||||||
|
|
||||||
## Benchmarks
|
## Benchmarks
|
||||||
We ran ftcsv against a few different csv parsers ([PIL](http://www.lua.org/pil/20.4.html)/[csvutils](http://lua-users.org/wiki/CsvUtils), [lua_csv](https://github.com/geoffleyland/lua-csv), and [lpeg_josh](http://lua-users.org/lists/lua-l/2009-08/msg00020.html)) for lua and here is what we found:
|
We ran ftcsv against a few different csv parsers ([PIL](http://www.lua.org/pil/20.4.html)/[csvutils](http://lua-users.org/wiki/CsvUtils), [lua_csv](https://github.com/geoffleyland/lua-csv), and [lpeg_josh](http://lua-users.org/lists/lua-l/2009-08/msg00020.html)) for lua and here is what we found:
|
||||||
|
|
||||||
@ -207,17 +236,6 @@ Feel free to create a new issue for any bugs you've found or help you need. If y
|
|||||||
8. Enjoy the changes made!
|
8. Enjoy the changes made!
|
||||||
|
|
||||||
|
|
||||||
## Delimiter no longer required as of 1.4.0!
|
|
||||||
Starting with version 1.4.0, the delimiter no longer required as the second argument. **But don't worry,** ftcsv remains backwards compatible! We check the argument types and adjust parsing as necessary. There is no intention to remove this backwards compatibility layer, so you can always enjoy your up-to-date lightning fast CSV parser!
|
|
||||||
|
|
||||||
So this works just fine:
|
|
||||||
```lua
|
|
||||||
ftcsv.parse("a>b>c\r\n1,2,3", ">", {loadFromString=true})
|
|
||||||
```
|
|
||||||
|
|
||||||
The delimiter as the second argument will always take precedent if both are provided.
|
|
||||||
|
|
||||||
|
|
||||||
## Licenses
|
## Licenses
|
||||||
- The main library is licensed under the MIT License. Feel free to use it!
|
- The main library is licensed under the MIT License. Feel free to use it!
|
||||||
- Some of the test CSVs are from [csv-spectrum](https://github.com/maxogden/csv-spectrum) (BSD-2-Clause) which includes some from [csvkit](https://github.com/wireservice/csvkit) (MIT License)
|
- Some of the test CSVs are from [csv-spectrum](https://github.com/maxogden/csv-spectrum) (BSD-2-Clause) which includes some from [csvkit](https://github.com/wireservice/csvkit) (MIT License)
|
||||||
|
@ -804,6 +804,7 @@ end
|
|||||||
|
|
||||||
-- works really quickly with luajit-2.1, because table.concat life
|
-- works really quickly with luajit-2.1, because table.concat life
|
||||||
function ftcsv.encode(inputTable, delimiter, options)
|
function ftcsv.encode(inputTable, delimiter, options)
|
||||||
|
local delimiter, options = determineArgumentOrder(delimiter, options)
|
||||||
local output, headers = initializeGenerator(inputTable, delimiter, options)
|
local output, headers = initializeGenerator(inputTable, delimiter, options)
|
||||||
|
|
||||||
for i, line in csvLineGenerator(inputTable, delimiter, headers, options) do
|
for i, line in csvLineGenerator(inputTable, delimiter, headers, options) do
|
||||||
|
@ -502,7 +502,7 @@ describe("csv features", function()
|
|||||||
assert.are.same(expected, actual)
|
assert.are.same(expected, actual)
|
||||||
end)
|
end)
|
||||||
|
|
||||||
it("should handle ignoring the single quote without specifying the delimeter", function()
|
it("should handle ignoring the single quote without specifying the delimiter", function()
|
||||||
local expected = {}
|
local expected = {}
|
||||||
expected[1] = {}
|
expected[1] = {}
|
||||||
expected[1].a = '"apple'
|
expected[1].a = '"apple'
|
||||||
@ -524,7 +524,7 @@ describe("csv features", function()
|
|||||||
assert.are.same(expected, actual)
|
assert.are.same(expected, actual)
|
||||||
end)
|
end)
|
||||||
|
|
||||||
it("should handle reusing the options without specifying the delimeter", function()
|
it("should handle reusing the options without specifying the delimiter", function()
|
||||||
local expected = {}
|
local expected = {}
|
||||||
expected[1] = {}
|
expected[1] = {}
|
||||||
expected[1].a = '"apple'
|
expected[1].a = '"apple'
|
||||||
|
@ -76,13 +76,29 @@ describe("csv encode", function()
|
|||||||
it("should handle " .. value, function()
|
it("should handle " .. value, function()
|
||||||
local jsonFile = loadFile("spec/json/" .. value .. ".json")
|
local jsonFile = loadFile("spec/json/" .. value .. ".json")
|
||||||
local jsonDecode = cjson.decode(jsonFile)
|
local jsonDecode = cjson.decode(jsonFile)
|
||||||
-- local parse = staecsv:ftcsv(contents, ",")
|
|
||||||
local reEncoded = ftcsv.parse(ftcsv.encode(jsonDecode, ","), ",", {loadFromString=true})
|
local reEncoded = ftcsv.parse(ftcsv.encode(jsonDecode, ","), ",", {loadFromString=true})
|
||||||
-- local f = csv.openstring(contents, {separator=",", header=true})
|
assert.are.same(jsonDecode, reEncoded)
|
||||||
-- local parse = {}
|
end)
|
||||||
-- for fields in f:lines() do
|
end
|
||||||
-- parse[#parse+1] = fields
|
end)
|
||||||
-- end
|
|
||||||
|
describe("csv encode without a delimiter", function()
|
||||||
|
for _, value in ipairs(files) do
|
||||||
|
it("should handle " .. value, function()
|
||||||
|
local jsonFile = loadFile("spec/json/" .. value .. ".json")
|
||||||
|
local jsonDecode = cjson.decode(jsonFile)
|
||||||
|
local reEncoded = ftcsv.parse(ftcsv.encode(jsonDecode), ",", {loadFromString=true})
|
||||||
|
assert.are.same(jsonDecode, reEncoded)
|
||||||
|
end)
|
||||||
|
end
|
||||||
|
end)
|
||||||
|
|
||||||
|
describe("csv encode with a delimiter specified in options", function()
|
||||||
|
for _, value in ipairs(files) do
|
||||||
|
it("should handle " .. value, function()
|
||||||
|
local jsonFile = loadFile("spec/json/" .. value .. ".json")
|
||||||
|
local jsonDecode = cjson.decode(jsonFile)
|
||||||
|
local reEncoded = ftcsv.parse(ftcsv.encode(jsonDecode, {delimiter="\t"}), {delimiter="\t", loadFromString=true})
|
||||||
assert.are.same(jsonDecode, reEncoded)
|
assert.are.same(jsonDecode, reEncoded)
|
||||||
end)
|
end)
|
||||||
end
|
end
|
||||||
@ -93,13 +109,7 @@ describe("csv encode without quotes", function()
|
|||||||
it("should handle " .. value, function()
|
it("should handle " .. value, function()
|
||||||
local jsonFile = loadFile("spec/json/" .. value .. ".json")
|
local jsonFile = loadFile("spec/json/" .. value .. ".json")
|
||||||
local jsonDecode = cjson.decode(jsonFile)
|
local jsonDecode = cjson.decode(jsonFile)
|
||||||
-- local parse = staecsv:ftcsv(contents, ",")
|
|
||||||
local reEncodedNoQuotes = ftcsv.parse(ftcsv.encode(jsonDecode, ",", {onlyRequiredQuotes=true}), ",", {loadFromString=true})
|
local reEncodedNoQuotes = ftcsv.parse(ftcsv.encode(jsonDecode, ",", {onlyRequiredQuotes=true}), ",", {loadFromString=true})
|
||||||
-- local f = csv.openstring(contents, {separator=",", header=true})
|
|
||||||
-- local parse = {}
|
|
||||||
-- for fields in f:lines() do
|
|
||||||
-- parse[#parse+1] = fields
|
|
||||||
-- end
|
|
||||||
assert.are.same(jsonDecode, reEncodedNoQuotes)
|
assert.are.same(jsonDecode, reEncodedNoQuotes)
|
||||||
end)
|
end)
|
||||||
end
|
end
|
||||||
|
Loading…
Reference in New Issue
Block a user