code refactor, can now modify headers via function, and slight performance increase

This commit is contained in:
FourierTransformer 2016-04-02 15:42:32 -05:00
parent f2083dd38b
commit ee24dcd9a6
4 changed files with 194 additions and 141 deletions

View File

@ -60,6 +60,16 @@ ftcsv.parse("apple,banana,carrot", ",", {loadFromString=true, headers=false})
local actual = ftcsv.parse("a,b,c\r\napple,banana,carrot\r\n", ",", options) local actual = ftcsv.parse("a,b,c\r\napple,banana,carrot\r\n", ",", options)
``` ```
- `headerFunc`
Applies a function to every field in the header. If you are using `rename`, the function is applied after the rename.
Ex: making all fields uppercase
```lua
local options = {loadFromString=true, headerFunc=string.upper}
local actual = ftcsv.parse("a,b,c\napple,banana,carrot", ",", options)
```
- `headers` - `headers`
Set `headers` to `false` if the file you are reading doesn't have any headers. This will cause ftcsv to create indexed tables rather than a key-value tables for the output. Set `headers` to `false` if the file you are reading doesn't have any headers. This will cause ftcsv to create indexed tables rather than a key-value tables for the output.

View File

@ -1,9 +1,9 @@
package = "ftcsv" package = "ftcsv"
version = "1.0.3-1" version = "1.1.0-1"
source = { source = {
url = "git://github.com/FourierTransformer/ftcsv.git", url = "git://github.com/FourierTransformer/ftcsv.git",
tag = "1.0.3" tag = "1.1.0"
} }
description = { description = {

295
ftcsv.lua
View File

@ -1,5 +1,5 @@
local ftcsv = { local ftcsv = {
_VERSION = 'ftcsv 1.0.3', _VERSION = 'ftcsv 1.1.0',
_DESCRIPTION = 'CSV library for Lua', _DESCRIPTION = 'CSV library for Lua',
_URL = 'https://github.com/FourierTransformer/ftcsv', _URL = 'https://github.com/FourierTransformer/ftcsv',
_LICENSE = [[ _LICENSE = [[
@ -104,108 +104,61 @@ local function loadFile(textFile)
end end
-- creates a new field and adds it to the main table -- creates a new field and adds it to the main table
local function createNewField(inputString, quote, fieldStart, i, line, fieldNum, doubleQuoteEscape, fieldsToKeep) local function createField(inputString, quote, fieldStart, i, doubleQuoteEscape)
-- print(lineNum, fieldNum, fieldStart, i-1) local field
-- so, if we just recently de-escaped, we don't want the trailing \" -- so, if we just recently de-escaped, we don't want the trailing \"
-- if fieldsToKeep == nil then
-- local fieldsToKeep = fieldsToKeep
-- print(fieldNum)
-- print(fieldsToKeep[fieldNum])
if fieldsToKeep == nil or fieldsToKeep[fieldNum] then
-- print(fieldsToKeep)
-- print("b4", i, fieldNum, line[fieldNum])
if sbyte(inputString, i-1) == quote then if sbyte(inputString, i-1) == quote then
-- print("Skipping last \"") -- print("Skipping last \"")
line[fieldNum] = ssub(inputString, fieldStart, i-2) field = ssub(inputString, fieldStart, i-2)
else else
line[fieldNum] = ssub(inputString, fieldStart, i-1) field = ssub(inputString, fieldStart, i-1)
end end
-- print("aft", i, fieldNum, line[fieldNum])
-- remove the double quotes (if they existed)
if doubleQuoteEscape then if doubleQuoteEscape then
-- print("QUOTE REPLACE") -- print("QUOTE REPLACE")
-- print(line[fieldNum]) -- print(line[fieldNum])
line[fieldNum] = line[fieldNum]:gsub('""', '"') field = field:gsub('""', '"')
return false
end end
end return field
end
-- creates the headers after reading through to the first line
local function createHeaders(line, rename)
-- print("CREATING HEADERS")
local headers = {}
for i = 1, #line do
if rename[line[i]] then
-- print("RENAMING", line[i], rename[line[i]])
headers[i] = rename[line[i]]
else
headers[i] = line[i]
end
end
return headers, 0, true
end end
-- main function used to parse -- main function used to parse
function ftcsv.parse(inputFile, delimiter, options) local function parseString(inputString, inputLength, delimiter, i, headerField, fieldsToKeep)
-- each line in outResults holds another table
local outResults = {}
outResults[1] = {}
-- delimiter MUST be one character
assert(#delimiter == 1 and type(delimiter) == "string", "the delimiter must be of string type and exactly one character")
local delimiterByte = sbyte(delimiter)
-- OPTIONS yo
local header = true
local rename = {}
local fieldsToKeep = nil
local ofieldsToKeep = nil
local loadFromString = false
if options then
if options.headers ~= nil then
assert(type(options.headers) == "boolean", "ftcsv only takes the boolean 'true' or 'false' for the optional parameter 'headers' (default 'true'). You passed in '" .. tostring(options.headers) .. "' of type '" .. type(options.headers) .. "'.")
header = options.headers
end
if options.rename ~= nil then
assert(type(options.rename) == "table", "ftcsv only takes in a key-value table for the optional parameter 'rename'. You passed in '" .. tostring(options.rename) .. "' of type '" .. type(options.rename) .. "'.")
rename = options.rename
end
if options.fieldsToKeep ~= nil then
assert(type(options.fieldsToKeep) == "table", "ftcsv only takes in a list (as a table) for the optional parameter 'fieldsToKeep'. You passed in '" .. tostring(options.fieldsToKeep) .. "' of type '" .. type(options.fieldsToKeep) .. "'.")
ofieldsToKeep = options.fieldsToKeep
if header == false then
assert(next(rename) ~= nil, "ftcsv can only have fieldsToKeep for header-less files when they have been renamed. Please add the 'rename' option and try again.")
end
end
if options.loadFromString ~= nil then
assert(type(options.loadFromString) == "boolean", "ftcsv only takes a boolean value for optional parameter 'loadFromString'. You passed in '" .. tostring(options.loadFromString) .. "' of type '" .. type(options.loadFromString) .. "'.")
loadFromString = options.loadFromString
end
end
local inputString
if loadFromString then
inputString = inputFile
else
inputString = loadFile(inputFile)
end
local CR = sbyte("\r")
local LF = sbyte("\n")
local quote = sbyte("\"")
local doubleQuoteEscape = false
local fieldStart = 1
local fieldNum = 1
local lineNum = 1
local skipChar = 0
local inputLength = #inputString
local headerField = {}
local headerSet = false
local i = 1
-- keep track of my chars! -- keep track of my chars!
local currentChar, nextChar = sbyte(inputString, i), nil local currentChar, nextChar = sbyte(inputString, i), nil
local skipChar = 0
local field
local fieldStart = i
local fieldNum = 1
local lineNum = 1
local doubleQuoteEscape = false
local exit = false
--bytes
local CR = sbyte("\r")
local LF = sbyte("\n")
local quote = sbyte('"')
local delimiterByte = sbyte(delimiter)
local assignValue
local outResults
-- the headers haven't been set yet.
-- aka this is the first run!
if headerField == nil then
-- print("this is for headers")
headerField = {}
assignValue = function()
headerField[fieldNum] = field
return true
end
else
-- print("this is for magic")
outResults = {}
outResults[1] = {}
assignValue = function()
outResults[lineNum][headerField[fieldNum]] = field
end
end
while i <= inputLength do while i <= inputLength do
-- go by two chars at a time! currentChar is set at the bottom. -- go by two chars at a time! currentChar is set at the bottom.
@ -230,13 +183,15 @@ function ftcsv.parse(inputFile, delimiter, options)
-- create some fields if we can! -- create some fields if we can!
elseif currentChar == delimiterByte then elseif currentChar == delimiterByte then
-- for that first field
if not headerSet and lineNum == 1 then
headerField[fieldNum] = fieldNum
end
-- create the new field -- create the new field
-- print(headerField[fieldNum]) -- print(headerField[fieldNum])
doubleQuoteEscape = createNewField(inputString, quote, fieldStart, i, outResults[lineNum], headerField[fieldNum], doubleQuoteEscape, fieldsToKeep) if fieldsToKeep == nil or fieldsToKeep[headerField[fieldNum]] then
field = createField(inputString, quote, fieldStart, i, doubleQuoteEscape)
-- print("FIELD", field, "FIELDEND", headerField[fieldNum], lineNum)
-- outResults[headerField[fieldNum]][lineNum] = field
assignValue()
end
doubleQuoteEscape = false
fieldNum = fieldNum + 1 fieldNum = fieldNum + 1
fieldStart = i + 1 fieldStart = i + 1
@ -245,52 +200,37 @@ function ftcsv.parse(inputFile, delimiter, options)
-- newline?! -- newline?!
elseif ((currentChar == CR and nextChar == LF) or currentChar == LF) then elseif ((currentChar == CR and nextChar == LF) or currentChar == LF) then
-- keep track of headers if fieldsToKeep == nil or fieldsToKeep[headerField[fieldNum]] then
if not headerSet and lineNum == 1 then
headerField[fieldNum] = fieldNum
end
-- create the new field -- create the new field
doubleQuoteEscape = createNewField(inputString, quote, fieldStart, i, outResults[lineNum], headerField[fieldNum], doubleQuoteEscape, fieldsToKeep) field = createField(inputString, quote, fieldStart, i, doubleQuoteEscape)
-- if we have headers then we gotta do something about it -- outResults[headerField[fieldNum]][lineNum] = field
if lineNum == 1 and not headerSet then exit = assignValue()
if ofieldsToKeep ~= nil then if exit then
fieldsToKeep = {} if (currentChar == CR and nextChar == LF) then
for j = 1, #ofieldsToKeep do return headerField, i + 1
fieldsToKeep[ofieldsToKeep[j]] = true
end
end
if header then
headerField, lineNum, headerSet = createHeaders(outResults[lineNum], rename)
else else
-- files without headers, but with a rename need to be handled too! return headerField, i
if #rename > 0 then
for j = 1, math.max(#rename, #headerField) do
headerField[j] = rename[j]
-- this is an odd case of where there are certain fields to be kept
if fieldsToKeep == nil or fieldsToKeep[rename[j]] then
outResults[1][rename[j]] = outResults[1][j]
end
-- print("J", j)
outResults[1][j] = nil
end
end end
end end
end end
doubleQuoteEscape = false
-- incrememnt for new line -- determine how line ends
lineNum = lineNum + 1
outResults[lineNum] = {}
fieldNum = 1
fieldStart = i + 1
-- print("fs:", fieldStart)
if (currentChar == CR and nextChar == LF) then if (currentChar == CR and nextChar == LF) then
-- print("CRLF DETECTED") -- print("CRLF DETECTED")
skipChar = 1 skipChar = 1
fieldStart = fieldStart + 1 fieldStart = fieldStart + 1
-- print("fs:", fieldStart) -- print("fs:", fieldStart)
end end
-- incrememnt for new line
lineNum = lineNum + 1
outResults[lineNum] = {}
fieldNum = 1
fieldStart = i + 1 + skipChar
-- print("fs:", fieldStart)
end end
i = i + 1 + skipChar i = i + 1 + skipChar
@ -302,10 +242,11 @@ function ftcsv.parse(inputFile, delimiter, options)
skipChar = 0 skipChar = 0
end end
-- if the line doesn't end happily (with a quote/newline), the last char will be forgotten. -- create last new field
-- this should take care of that. if fieldsToKeep == nil or fieldsToKeep[headerField[fieldNum]] then
createNewField(inputString, quote, fieldStart, i, outResults[lineNum], headerField[fieldNum], doubleQuoteEscape, fieldsToKeep) field = createField(inputString, quote, fieldStart, i, doubleQuoteEscape)
-- end assignValue()
end
-- clean up last line if it's weird (this happens when there is a CRLF newline at end of file) -- clean up last line if it's weird (this happens when there is a CRLF newline at end of file)
-- doing a count gets it to pick up the oddballs -- doing a count gets it to pick up the oddballs
@ -325,9 +266,101 @@ function ftcsv.parse(inputFile, delimiter, options)
return outResults return outResults
end end
-- runs the show!
function ftcsv.parse(inputFile, delimiter, options)
-- delimiter MUST be one character
assert(#delimiter == 1 and type(delimiter) == "string", "the delimiter must be of string type and exactly one character")
-- OPTIONS yo
local header = true
local rename
local fieldsToKeep = nil
local loadFromString = false
local headerFunc
if options then
if options.headers ~= nil then
assert(type(options.headers) == "boolean", "ftcsv only takes the boolean 'true' or 'false' for the optional parameter 'headers' (default 'true'). You passed in '" .. tostring(options.headers) .. "' of type '" .. type(options.headers) .. "'.")
header = options.headers
end
if options.rename ~= nil then
assert(type(options.rename) == "table", "ftcsv only takes in a key-value table for the optional parameter 'rename'. You passed in '" .. tostring(options.rename) .. "' of type '" .. type(options.rename) .. "'.")
rename = options.rename
end
if options.fieldsToKeep ~= nil then
assert(type(options.fieldsToKeep) == "table", "ftcsv only takes in a list (as a table) for the optional parameter 'fieldsToKeep'. You passed in '" .. tostring(options.fieldsToKeep) .. "' of type '" .. type(options.fieldsToKeep) .. "'.")
local ofieldsToKeep = options.fieldsToKeep
if ofieldsToKeep ~= nil then
fieldsToKeep = {}
for j = 1, #ofieldsToKeep do
fieldsToKeep[ofieldsToKeep[j]] = true
end
end
if header == false then
assert(next(rename) ~= nil, "ftcsv can only have fieldsToKeep for header-less files when they have been renamed. Please add the 'rename' option and try again.")
end
end
if options.loadFromString ~= nil then
assert(type(options.loadFromString) == "boolean", "ftcsv only takes a boolean value for optional parameter 'loadFromString'. You passed in '" .. tostring(options.loadFromString) .. "' of type '" .. type(options.loadFromString) .. "'.")
loadFromString = options.loadFromString
end
if options.headerFunc ~= nil then
assert(type(options.headerFunc) == "function", "ftcsv only takes a function value for optional parameter 'headerFunc'. You passed in '" .. tostring(options.headerFunc) .. "' of type '" .. type(options.headerFunc) .. "'.")
headerFunc = options.headerFunc
end
end
-- handle input via string or file!
local inputString
if loadFromString then
inputString = inputFile
else
inputString = loadFile(inputFile)
end
local inputLength = #inputString
-- parse through the headers!
local headerField, i = parseString(inputString, inputLength, delimiter, 0)
i = i + 1 -- start at the next char
-- for files where there aren't headers!
if header == false then
i = 0
for j = 1, #headerField do
headerField[j] = j
end
end
-- rename fields as needed!
if rename then
-- basic rename (["a" = "apple"])
for j = 1, #headerField do
if rename[headerField[j]] then
-- print("RENAMING", headerField[j], rename[headerField[j]])
headerField[j] = rename[headerField[j]]
end
end
-- files without headers, but with a rename need to be handled too!
if #rename > 0 then
for j = 1, #rename do
headerField[j] = rename[j]
end
end
end
-- apply some sweet header manuipulation
if headerFunc then
for j = 1, #headerField do
headerField[j] = headerFunc(headerField[j])
end
end
local output = parseString(inputString, inputLength, delimiter, i, headerField, fieldsToKeep)
return output
end
-- a function that delimits " to "", used by the writer -- a function that delimits " to "", used by the writer
local function delimitField(field) local function delimitField(field)
local field = tostring(field) field = tostring(field)
if field:find('"') then if field:find('"') then
return field:gsub('"', '""') return field:gsub('"', '""')
else else

View File

@ -153,4 +153,14 @@ describe("csv features", function()
assert.are.same(expected, actual) assert.are.same(expected, actual)
end) end)
it("should make things uppercase via headerFunc", function()
local expected = {}
expected[1] = {}
expected[1].A = "apple"
expected[1].B = "banana"
expected[1].C = "carrot"
local actual = ftcsv.parse("a,b,c\napple,banana,carrot", ",", {loadFromString=true, headerFunc=string.upper})
assert.are.same(expected, actual)
end)
end) end)