mirror of
https://github.com/FourierTransformer/ftcsv.git
synced 2024-12-16 05:24:21 +00:00
code refactor, can now modify headers via function, and slight performance increase
This commit is contained in:
parent
8565c1e6a4
commit
a15d50ec8b
10
README.md
10
README.md
@ -60,6 +60,16 @@ ftcsv.parse("apple,banana,carrot", ",", {loadFromString=true, headers=false})
|
|||||||
local actual = ftcsv.parse("a,b,c\r\napple,banana,carrot\r\n", ",", options)
|
local actual = ftcsv.parse("a,b,c\r\napple,banana,carrot\r\n", ",", options)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
- `headerFunc`
|
||||||
|
|
||||||
|
Applies a function to every field in the header. If you are using `rename`, the function is applied after the rename.
|
||||||
|
|
||||||
|
Ex: making all fields uppercase
|
||||||
|
```lua
|
||||||
|
local options = {loadFromString=true, headerFunc=string.upper}
|
||||||
|
local actual = ftcsv.parse("a,b,c\napple,banana,carrot", ",", options)
|
||||||
|
```
|
||||||
|
|
||||||
- `headers`
|
- `headers`
|
||||||
|
|
||||||
Set `headers` to `false` if the file you are reading doesn't have any headers. This will cause ftcsv to create indexed tables rather than a key-value tables for the output.
|
Set `headers` to `false` if the file you are reading doesn't have any headers. This will cause ftcsv to create indexed tables rather than a key-value tables for the output.
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
package = "ftcsv"
|
package = "ftcsv"
|
||||||
version = "1.0.3-1"
|
version = "1.1.0-1"
|
||||||
|
|
||||||
source = {
|
source = {
|
||||||
url = "git://github.com/FourierTransformer/ftcsv.git",
|
url = "git://github.com/FourierTransformer/ftcsv.git",
|
||||||
tag = "1.0.3"
|
tag = "1.1.0"
|
||||||
}
|
}
|
||||||
|
|
||||||
description = {
|
description = {
|
311
ftcsv.lua
311
ftcsv.lua
@ -1,5 +1,5 @@
|
|||||||
local ftcsv = {
|
local ftcsv = {
|
||||||
_VERSION = 'ftcsv 1.0.3',
|
_VERSION = 'ftcsv 1.1.0',
|
||||||
_DESCRIPTION = 'CSV library for Lua',
|
_DESCRIPTION = 'CSV library for Lua',
|
||||||
_URL = 'https://github.com/FourierTransformer/ftcsv',
|
_URL = 'https://github.com/FourierTransformer/ftcsv',
|
||||||
_LICENSE = [[
|
_LICENSE = [[
|
||||||
@ -104,108 +104,61 @@ local function loadFile(textFile)
|
|||||||
end
|
end
|
||||||
|
|
||||||
-- creates a new field and adds it to the main table
|
-- creates a new field and adds it to the main table
|
||||||
local function createNewField(inputString, quote, fieldStart, i, line, fieldNum, doubleQuoteEscape, fieldsToKeep)
|
local function createField(inputString, quote, fieldStart, i, doubleQuoteEscape)
|
||||||
-- print(lineNum, fieldNum, fieldStart, i-1)
|
local field
|
||||||
-- so, if we just recently de-escaped, we don't want the trailing \"
|
-- so, if we just recently de-escaped, we don't want the trailing \"
|
||||||
-- if fieldsToKeep == nil then
|
if sbyte(inputString, i-1) == quote then
|
||||||
-- local fieldsToKeep = fieldsToKeep
|
-- print("Skipping last \"")
|
||||||
-- print(fieldNum)
|
field = ssub(inputString, fieldStart, i-2)
|
||||||
-- print(fieldsToKeep[fieldNum])
|
else
|
||||||
if fieldsToKeep == nil or fieldsToKeep[fieldNum] then
|
field = ssub(inputString, fieldStart, i-1)
|
||||||
-- print(fieldsToKeep)
|
|
||||||
-- print("b4", i, fieldNum, line[fieldNum])
|
|
||||||
if sbyte(inputString, i-1) == quote then
|
|
||||||
-- print("Skipping last \"")
|
|
||||||
line[fieldNum] = ssub(inputString, fieldStart, i-2)
|
|
||||||
else
|
|
||||||
line[fieldNum] = ssub(inputString, fieldStart, i-1)
|
|
||||||
end
|
|
||||||
-- print("aft", i, fieldNum, line[fieldNum])
|
|
||||||
-- remove the double quotes (if they existed)
|
|
||||||
if doubleQuoteEscape then
|
|
||||||
-- print("QUOTE REPLACE")
|
|
||||||
-- print(line[fieldNum])
|
|
||||||
line[fieldNum] = line[fieldNum]:gsub('""', '"')
|
|
||||||
return false
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
if doubleQuoteEscape then
|
||||||
|
-- print("QUOTE REPLACE")
|
||||||
-- creates the headers after reading through to the first line
|
-- print(line[fieldNum])
|
||||||
local function createHeaders(line, rename)
|
field = field:gsub('""', '"')
|
||||||
-- print("CREATING HEADERS")
|
|
||||||
local headers = {}
|
|
||||||
for i = 1, #line do
|
|
||||||
if rename[line[i]] then
|
|
||||||
-- print("RENAMING", line[i], rename[line[i]])
|
|
||||||
headers[i] = rename[line[i]]
|
|
||||||
else
|
|
||||||
headers[i] = line[i]
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
return headers, 0, true
|
return field
|
||||||
end
|
end
|
||||||
|
|
||||||
-- main function used to parse
|
-- main function used to parse
|
||||||
function ftcsv.parse(inputFile, delimiter, options)
|
local function parseString(inputString, inputLength, delimiter, i, headerField, fieldsToKeep)
|
||||||
-- each line in outResults holds another table
|
|
||||||
local outResults = {}
|
|
||||||
outResults[1] = {}
|
|
||||||
|
|
||||||
-- delimiter MUST be one character
|
|
||||||
assert(#delimiter == 1 and type(delimiter) == "string", "the delimiter must be of string type and exactly one character")
|
|
||||||
local delimiterByte = sbyte(delimiter)
|
|
||||||
|
|
||||||
-- OPTIONS yo
|
|
||||||
local header = true
|
|
||||||
local rename = {}
|
|
||||||
local fieldsToKeep = nil
|
|
||||||
local ofieldsToKeep = nil
|
|
||||||
local loadFromString = false
|
|
||||||
if options then
|
|
||||||
if options.headers ~= nil then
|
|
||||||
assert(type(options.headers) == "boolean", "ftcsv only takes the boolean 'true' or 'false' for the optional parameter 'headers' (default 'true'). You passed in '" .. tostring(options.headers) .. "' of type '" .. type(options.headers) .. "'.")
|
|
||||||
header = options.headers
|
|
||||||
end
|
|
||||||
if options.rename ~= nil then
|
|
||||||
assert(type(options.rename) == "table", "ftcsv only takes in a key-value table for the optional parameter 'rename'. You passed in '" .. tostring(options.rename) .. "' of type '" .. type(options.rename) .. "'.")
|
|
||||||
rename = options.rename
|
|
||||||
end
|
|
||||||
if options.fieldsToKeep ~= nil then
|
|
||||||
assert(type(options.fieldsToKeep) == "table", "ftcsv only takes in a list (as a table) for the optional parameter 'fieldsToKeep'. You passed in '" .. tostring(options.fieldsToKeep) .. "' of type '" .. type(options.fieldsToKeep) .. "'.")
|
|
||||||
ofieldsToKeep = options.fieldsToKeep
|
|
||||||
if header == false then
|
|
||||||
assert(next(rename) ~= nil, "ftcsv can only have fieldsToKeep for header-less files when they have been renamed. Please add the 'rename' option and try again.")
|
|
||||||
end
|
|
||||||
end
|
|
||||||
if options.loadFromString ~= nil then
|
|
||||||
assert(type(options.loadFromString) == "boolean", "ftcsv only takes a boolean value for optional parameter 'loadFromString'. You passed in '" .. tostring(options.loadFromString) .. "' of type '" .. type(options.loadFromString) .. "'.")
|
|
||||||
loadFromString = options.loadFromString
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
local inputString
|
|
||||||
if loadFromString then
|
|
||||||
inputString = inputFile
|
|
||||||
else
|
|
||||||
inputString = loadFile(inputFile)
|
|
||||||
end
|
|
||||||
|
|
||||||
local CR = sbyte("\r")
|
|
||||||
local LF = sbyte("\n")
|
|
||||||
local quote = sbyte("\"")
|
|
||||||
local doubleQuoteEscape = false
|
|
||||||
local fieldStart = 1
|
|
||||||
local fieldNum = 1
|
|
||||||
local lineNum = 1
|
|
||||||
local skipChar = 0
|
|
||||||
local inputLength = #inputString
|
|
||||||
local headerField = {}
|
|
||||||
local headerSet = false
|
|
||||||
local i = 1
|
|
||||||
|
|
||||||
-- keep track of my chars!
|
-- keep track of my chars!
|
||||||
local currentChar, nextChar = sbyte(inputString, i), nil
|
local currentChar, nextChar = sbyte(inputString, i), nil
|
||||||
|
local skipChar = 0
|
||||||
|
local field
|
||||||
|
local fieldStart = i
|
||||||
|
local fieldNum = 1
|
||||||
|
local lineNum = 1
|
||||||
|
local doubleQuoteEscape = false
|
||||||
|
local exit = false
|
||||||
|
|
||||||
|
--bytes
|
||||||
|
local CR = sbyte("\r")
|
||||||
|
local LF = sbyte("\n")
|
||||||
|
local quote = sbyte('"')
|
||||||
|
local delimiterByte = sbyte(delimiter)
|
||||||
|
|
||||||
|
local assignValue
|
||||||
|
local outResults
|
||||||
|
-- the headers haven't been set yet.
|
||||||
|
-- aka this is the first run!
|
||||||
|
if headerField == nil then
|
||||||
|
-- print("this is for headers")
|
||||||
|
headerField = {}
|
||||||
|
assignValue = function()
|
||||||
|
headerField[fieldNum] = field
|
||||||
|
return true
|
||||||
|
end
|
||||||
|
else
|
||||||
|
-- print("this is for magic")
|
||||||
|
outResults = {}
|
||||||
|
outResults[1] = {}
|
||||||
|
assignValue = function()
|
||||||
|
outResults[lineNum][headerField[fieldNum]] = field
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
while i <= inputLength do
|
while i <= inputLength do
|
||||||
-- go by two chars at a time! currentChar is set at the bottom.
|
-- go by two chars at a time! currentChar is set at the bottom.
|
||||||
@ -230,13 +183,15 @@ function ftcsv.parse(inputFile, delimiter, options)
|
|||||||
|
|
||||||
-- create some fields if we can!
|
-- create some fields if we can!
|
||||||
elseif currentChar == delimiterByte then
|
elseif currentChar == delimiterByte then
|
||||||
-- for that first field
|
|
||||||
if not headerSet and lineNum == 1 then
|
|
||||||
headerField[fieldNum] = fieldNum
|
|
||||||
end
|
|
||||||
-- create the new field
|
-- create the new field
|
||||||
-- print(headerField[fieldNum])
|
-- print(headerField[fieldNum])
|
||||||
doubleQuoteEscape = createNewField(inputString, quote, fieldStart, i, outResults[lineNum], headerField[fieldNum], doubleQuoteEscape, fieldsToKeep)
|
if fieldsToKeep == nil or fieldsToKeep[headerField[fieldNum]] then
|
||||||
|
field = createField(inputString, quote, fieldStart, i, doubleQuoteEscape)
|
||||||
|
-- print("FIELD", field, "FIELDEND", headerField[fieldNum], lineNum)
|
||||||
|
-- outResults[headerField[fieldNum]][lineNum] = field
|
||||||
|
assignValue()
|
||||||
|
end
|
||||||
|
doubleQuoteEscape = false
|
||||||
|
|
||||||
fieldNum = fieldNum + 1
|
fieldNum = fieldNum + 1
|
||||||
fieldStart = i + 1
|
fieldStart = i + 1
|
||||||
@ -245,52 +200,37 @@ function ftcsv.parse(inputFile, delimiter, options)
|
|||||||
|
|
||||||
-- newline?!
|
-- newline?!
|
||||||
elseif ((currentChar == CR and nextChar == LF) or currentChar == LF) then
|
elseif ((currentChar == CR and nextChar == LF) or currentChar == LF) then
|
||||||
-- keep track of headers
|
if fieldsToKeep == nil or fieldsToKeep[headerField[fieldNum]] then
|
||||||
if not headerSet and lineNum == 1 then
|
-- create the new field
|
||||||
headerField[fieldNum] = fieldNum
|
field = createField(inputString, quote, fieldStart, i, doubleQuoteEscape)
|
||||||
end
|
|
||||||
|
|
||||||
-- create the new field
|
-- outResults[headerField[fieldNum]][lineNum] = field
|
||||||
doubleQuoteEscape = createNewField(inputString, quote, fieldStart, i, outResults[lineNum], headerField[fieldNum], doubleQuoteEscape, fieldsToKeep)
|
exit = assignValue()
|
||||||
|
if exit then
|
||||||
-- if we have headers then we gotta do something about it
|
if (currentChar == CR and nextChar == LF) then
|
||||||
if lineNum == 1 and not headerSet then
|
return headerField, i + 1
|
||||||
if ofieldsToKeep ~= nil then
|
else
|
||||||
fieldsToKeep = {}
|
return headerField, i
|
||||||
for j = 1, #ofieldsToKeep do
|
|
||||||
fieldsToKeep[ofieldsToKeep[j]] = true
|
|
||||||
end
|
|
||||||
end
|
|
||||||
if header then
|
|
||||||
headerField, lineNum, headerSet = createHeaders(outResults[lineNum], rename)
|
|
||||||
else
|
|
||||||
-- files without headers, but with a rename need to be handled too!
|
|
||||||
if #rename > 0 then
|
|
||||||
for j = 1, math.max(#rename, #headerField) do
|
|
||||||
headerField[j] = rename[j]
|
|
||||||
-- this is an odd case of where there are certain fields to be kept
|
|
||||||
if fieldsToKeep == nil or fieldsToKeep[rename[j]] then
|
|
||||||
outResults[1][rename[j]] = outResults[1][j]
|
|
||||||
end
|
|
||||||
-- print("J", j)
|
|
||||||
outResults[1][j] = nil
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
doubleQuoteEscape = false
|
||||||
|
|
||||||
-- incrememnt for new line
|
-- determine how line ends
|
||||||
lineNum = lineNum + 1
|
|
||||||
outResults[lineNum] = {}
|
|
||||||
fieldNum = 1
|
|
||||||
fieldStart = i + 1
|
|
||||||
-- print("fs:", fieldStart)
|
|
||||||
if (currentChar == CR and nextChar == LF) then
|
if (currentChar == CR and nextChar == LF) then
|
||||||
-- print("CRLF DETECTED")
|
-- print("CRLF DETECTED")
|
||||||
skipChar = 1
|
skipChar = 1
|
||||||
fieldStart = fieldStart + 1
|
fieldStart = fieldStart + 1
|
||||||
-- print("fs:", fieldStart)
|
-- print("fs:", fieldStart)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
-- incrememnt for new line
|
||||||
|
lineNum = lineNum + 1
|
||||||
|
outResults[lineNum] = {}
|
||||||
|
fieldNum = 1
|
||||||
|
fieldStart = i + 1 + skipChar
|
||||||
|
-- print("fs:", fieldStart)
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
i = i + 1 + skipChar
|
i = i + 1 + skipChar
|
||||||
@ -302,10 +242,11 @@ function ftcsv.parse(inputFile, delimiter, options)
|
|||||||
skipChar = 0
|
skipChar = 0
|
||||||
end
|
end
|
||||||
|
|
||||||
-- if the line doesn't end happily (with a quote/newline), the last char will be forgotten.
|
-- create last new field
|
||||||
-- this should take care of that.
|
if fieldsToKeep == nil or fieldsToKeep[headerField[fieldNum]] then
|
||||||
createNewField(inputString, quote, fieldStart, i, outResults[lineNum], headerField[fieldNum], doubleQuoteEscape, fieldsToKeep)
|
field = createField(inputString, quote, fieldStart, i, doubleQuoteEscape)
|
||||||
-- end
|
assignValue()
|
||||||
|
end
|
||||||
|
|
||||||
-- clean up last line if it's weird (this happens when there is a CRLF newline at end of file)
|
-- clean up last line if it's weird (this happens when there is a CRLF newline at end of file)
|
||||||
-- doing a count gets it to pick up the oddballs
|
-- doing a count gets it to pick up the oddballs
|
||||||
@ -325,9 +266,101 @@ function ftcsv.parse(inputFile, delimiter, options)
|
|||||||
return outResults
|
return outResults
|
||||||
end
|
end
|
||||||
|
|
||||||
|
-- runs the show!
|
||||||
|
function ftcsv.parse(inputFile, delimiter, options)
|
||||||
|
-- delimiter MUST be one character
|
||||||
|
assert(#delimiter == 1 and type(delimiter) == "string", "the delimiter must be of string type and exactly one character")
|
||||||
|
|
||||||
|
-- OPTIONS yo
|
||||||
|
local header = true
|
||||||
|
local rename
|
||||||
|
local fieldsToKeep = nil
|
||||||
|
local loadFromString = false
|
||||||
|
local headerFunc
|
||||||
|
if options then
|
||||||
|
if options.headers ~= nil then
|
||||||
|
assert(type(options.headers) == "boolean", "ftcsv only takes the boolean 'true' or 'false' for the optional parameter 'headers' (default 'true'). You passed in '" .. tostring(options.headers) .. "' of type '" .. type(options.headers) .. "'.")
|
||||||
|
header = options.headers
|
||||||
|
end
|
||||||
|
if options.rename ~= nil then
|
||||||
|
assert(type(options.rename) == "table", "ftcsv only takes in a key-value table for the optional parameter 'rename'. You passed in '" .. tostring(options.rename) .. "' of type '" .. type(options.rename) .. "'.")
|
||||||
|
rename = options.rename
|
||||||
|
end
|
||||||
|
if options.fieldsToKeep ~= nil then
|
||||||
|
assert(type(options.fieldsToKeep) == "table", "ftcsv only takes in a list (as a table) for the optional parameter 'fieldsToKeep'. You passed in '" .. tostring(options.fieldsToKeep) .. "' of type '" .. type(options.fieldsToKeep) .. "'.")
|
||||||
|
local ofieldsToKeep = options.fieldsToKeep
|
||||||
|
if ofieldsToKeep ~= nil then
|
||||||
|
fieldsToKeep = {}
|
||||||
|
for j = 1, #ofieldsToKeep do
|
||||||
|
fieldsToKeep[ofieldsToKeep[j]] = true
|
||||||
|
end
|
||||||
|
end
|
||||||
|
if header == false then
|
||||||
|
assert(next(rename) ~= nil, "ftcsv can only have fieldsToKeep for header-less files when they have been renamed. Please add the 'rename' option and try again.")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
if options.loadFromString ~= nil then
|
||||||
|
assert(type(options.loadFromString) == "boolean", "ftcsv only takes a boolean value for optional parameter 'loadFromString'. You passed in '" .. tostring(options.loadFromString) .. "' of type '" .. type(options.loadFromString) .. "'.")
|
||||||
|
loadFromString = options.loadFromString
|
||||||
|
end
|
||||||
|
if options.headerFunc ~= nil then
|
||||||
|
assert(type(options.headerFunc) == "function", "ftcsv only takes a function value for optional parameter 'headerFunc'. You passed in '" .. tostring(options.headerFunc) .. "' of type '" .. type(options.headerFunc) .. "'.")
|
||||||
|
headerFunc = options.headerFunc
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
-- handle input via string or file!
|
||||||
|
local inputString
|
||||||
|
if loadFromString then
|
||||||
|
inputString = inputFile
|
||||||
|
else
|
||||||
|
inputString = loadFile(inputFile)
|
||||||
|
end
|
||||||
|
local inputLength = #inputString
|
||||||
|
|
||||||
|
-- parse through the headers!
|
||||||
|
local headerField, i = parseString(inputString, inputLength, delimiter, 0)
|
||||||
|
i = i + 1 -- start at the next char
|
||||||
|
|
||||||
|
-- for files where there aren't headers!
|
||||||
|
if header == false then
|
||||||
|
i = 0
|
||||||
|
for j = 1, #headerField do
|
||||||
|
headerField[j] = j
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
-- rename fields as needed!
|
||||||
|
if rename then
|
||||||
|
-- basic rename (["a" = "apple"])
|
||||||
|
for j = 1, #headerField do
|
||||||
|
if rename[headerField[j]] then
|
||||||
|
-- print("RENAMING", headerField[j], rename[headerField[j]])
|
||||||
|
headerField[j] = rename[headerField[j]]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
-- files without headers, but with a rename need to be handled too!
|
||||||
|
if #rename > 0 then
|
||||||
|
for j = 1, #rename do
|
||||||
|
headerField[j] = rename[j]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
-- apply some sweet header manuipulation
|
||||||
|
if headerFunc then
|
||||||
|
for j = 1, #headerField do
|
||||||
|
headerField[j] = headerFunc(headerField[j])
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
local output = parseString(inputString, inputLength, delimiter, i, headerField, fieldsToKeep)
|
||||||
|
return output
|
||||||
|
end
|
||||||
|
|
||||||
-- a function that delimits " to "", used by the writer
|
-- a function that delimits " to "", used by the writer
|
||||||
local function delimitField(field)
|
local function delimitField(field)
|
||||||
local field = tostring(field)
|
field = tostring(field)
|
||||||
if field:find('"') then
|
if field:find('"') then
|
||||||
return field:gsub('"', '""')
|
return field:gsub('"', '""')
|
||||||
else
|
else
|
||||||
|
@ -153,4 +153,14 @@ describe("csv features", function()
|
|||||||
assert.are.same(expected, actual)
|
assert.are.same(expected, actual)
|
||||||
end)
|
end)
|
||||||
|
|
||||||
|
it("should make things uppercase via headerFunc", function()
|
||||||
|
local expected = {}
|
||||||
|
expected[1] = {}
|
||||||
|
expected[1].A = "apple"
|
||||||
|
expected[1].B = "banana"
|
||||||
|
expected[1].C = "carrot"
|
||||||
|
local actual = ftcsv.parse("a,b,c\napple,banana,carrot", ",", {loadFromString=true, headerFunc=string.upper})
|
||||||
|
assert.are.same(expected, actual)
|
||||||
|
end)
|
||||||
|
|
||||||
end)
|
end)
|
Loading…
Reference in New Issue
Block a user