mirror of
https://github.com/FourierTransformer/ftcsv.git
synced 2024-11-19 19:54:23 +00:00
returned headers should now be correct, BOM fix for headerless files and added dynamic_features_spec (#19)
This commit is contained in:
parent
2dff1f2764
commit
8bd8fbe065
23
ftcsv.lua
23
ftcsv.lua
@ -295,6 +295,24 @@ local function parseString(inputString, inputLength, delimiter, i, headerField,
|
||||
return outResults
|
||||
end
|
||||
|
||||
-- determine the real headers as opposed to the header mapping
|
||||
local function determineRealHeaders(headerField, fieldsToKeep)
|
||||
local realHeaders = {}
|
||||
local headerSet = {}
|
||||
for i = 1, #headerField do
|
||||
if not headerSet[headerField[i]] then
|
||||
if fieldsToKeep ~= nil and fieldsToKeep[headerField[i]] then
|
||||
table.insert(realHeaders, headerField[i])
|
||||
headerSet[headerField[i]] = true
|
||||
elseif fieldsToKeep == nil then
|
||||
table.insert(realHeaders, headerField[i])
|
||||
headerSet[headerField[i]] = true
|
||||
end
|
||||
end
|
||||
end
|
||||
return realHeaders
|
||||
end
|
||||
|
||||
-- runs the show!
|
||||
function ftcsv.parse(inputFile, delimiter, options)
|
||||
-- delimiter MUST be one character
|
||||
@ -373,7 +391,7 @@ function ftcsv.parse(inputFile, delimiter, options)
|
||||
|
||||
-- for files where there aren't headers!
|
||||
if header == false then
|
||||
i = 1
|
||||
i = startLine
|
||||
for j = 1, #headerField do
|
||||
headerField[j] = j
|
||||
end
|
||||
@ -404,7 +422,8 @@ function ftcsv.parse(inputFile, delimiter, options)
|
||||
end
|
||||
|
||||
local output = parseString(inputString, inputLength, delimiter, i, headerField, fieldsToKeep)
|
||||
return output, headerField
|
||||
local realHeaders = determineRealHeaders(headerField, fieldsToKeep)
|
||||
return output, realHeaders
|
||||
end
|
||||
|
||||
-- a function that delimits " to "", used by the writer
|
||||
|
463
spec/dynamic_features_spec.lua
Normal file
463
spec/dynamic_features_spec.lua
Normal file
@ -0,0 +1,463 @@
|
||||
local ftcsv = require "ftcsv"
|
||||
|
||||
local BOM = {["NO BOM"] = "", ["BOM"] = string.char(239, 187, 191)}
|
||||
local newlines = {["LF"] = "\n", ["CRLF"] = "\r\n", ["CR"] = "\r"}
|
||||
local endlines = {"NONE", "NEWLINE"}
|
||||
local quotes = {["NO QUOTES"] = "", ["DOUBLE QUOTES"] = '"'}
|
||||
|
||||
describe("csv features", function()
|
||||
for bom, i in pairs(BOM) do
|
||||
for newline, j in pairs(newlines) do
|
||||
for quote, k in pairs(quotes) do
|
||||
for _, endline in ipairs(endlines) do
|
||||
local name = "should handle loading from string (%s + %s + %s) EOF: %s"
|
||||
it(name:format(bom, newline, quote, endline), function()
|
||||
local expectedHeaders = {"a", "b", "c"}
|
||||
local expected = {}
|
||||
expected[1] = {}
|
||||
expected[1].a = "apple"
|
||||
expected[1].b = "banana"
|
||||
expected[1].c = "carrot"
|
||||
|
||||
local defaultString = "%s`a`,`b`,`c`%s`apple`,`banana`,`carrot`%s"
|
||||
defaultString = defaultString:gsub("`", k)
|
||||
|
||||
if endline == "NONE" then
|
||||
defaultString = defaultString:format(i, j, "")
|
||||
else
|
||||
defaultString = defaultString:format(i, j, j)
|
||||
end
|
||||
|
||||
local options = {loadFromString=true}
|
||||
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
|
||||
assert.are.same(expected, actual)
|
||||
assert.are.same(expectedHeaders, actualHeaders)
|
||||
end)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for bom, i in pairs(BOM) do
|
||||
for newline, j in pairs(newlines) do
|
||||
for quote, k in pairs(quotes) do
|
||||
for _, endline in ipairs(endlines) do
|
||||
local name = "should handle renaming fields (%s + %s + %s) EOF: %s"
|
||||
it(name:format(bom, newline, quote, endline), function()
|
||||
local expectedHeaders = {"d", "e", "f"}
|
||||
local expected = {}
|
||||
expected[1] = {}
|
||||
expected[1].d = "apple"
|
||||
expected[1].e = "banana"
|
||||
expected[1].f = "carrot"
|
||||
|
||||
local defaultString = "%s`a`,`b`,`c`%s`apple`,`banana`,`carrot`%s"
|
||||
defaultString = defaultString:gsub("`", k)
|
||||
|
||||
if endline == "NONE" then
|
||||
defaultString = defaultString:format(i, j, "")
|
||||
else
|
||||
defaultString = defaultString:format(i, j, j)
|
||||
end
|
||||
|
||||
local options = {loadFromString=true, rename={["a"] = "d", ["b"] = "e", ["c"] = "f"}}
|
||||
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
|
||||
assert.are.same(expected, actual)
|
||||
assert.are.same(expectedHeaders, actualHeaders)
|
||||
end)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for bom, i in pairs(BOM) do
|
||||
for newline, j in pairs(newlines) do
|
||||
for quote, k in pairs(quotes) do
|
||||
for _, endline in ipairs(endlines) do
|
||||
local name = "should handle renaming fields to the same out value (%s + %s + %s) EOF: %s"
|
||||
it(name:format(bom, newline, quote, endline), function()
|
||||
local expectedHeaders = {"d", "e"}
|
||||
local expected = {}
|
||||
expected[1] = {}
|
||||
expected[1].d = "apple"
|
||||
expected[1].e = "carrot"
|
||||
|
||||
local defaultString = "%s`a`,`b`,`c`%s`apple`,`banana`,`carrot`%s"
|
||||
defaultString = defaultString:gsub("`", k)
|
||||
|
||||
if endline == "NONE" then
|
||||
defaultString = defaultString:format(i, j, "")
|
||||
else
|
||||
defaultString = defaultString:format(i, j, j)
|
||||
end
|
||||
|
||||
local options = {loadFromString=true, rename={["a"] = "d", ["b"] = "e", ["c"] = "e"}}
|
||||
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
|
||||
assert.are.same(expected, actual)
|
||||
assert.are.same(expectedHeaders, actualHeaders)
|
||||
end)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for bom, i in pairs(BOM) do
|
||||
for newline, j in pairs(newlines) do
|
||||
for quote, k in pairs(quotes) do
|
||||
for _, endline in ipairs(endlines) do
|
||||
local name = "should handle keeping only a few fields (%s + %s + %s) EOF: %s"
|
||||
it(name:format(bom, newline, quote, endline), function()
|
||||
local expectedHeaders = {"a", "b"}
|
||||
local expected = {}
|
||||
expected[1] = {}
|
||||
expected[1].a = "apple"
|
||||
expected[1].b = "banana"
|
||||
|
||||
local defaultString = "%s`a`,`b`,`c`%s`apple`,`banana`,`carrot`%s"
|
||||
defaultString = defaultString:gsub("`", k)
|
||||
|
||||
if endline == "NONE" then
|
||||
defaultString = defaultString:format(i, j, "")
|
||||
else
|
||||
defaultString = defaultString:format(i, j, j)
|
||||
end
|
||||
|
||||
local options = {loadFromString=true, fieldsToKeep={"a", "b"}}
|
||||
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
|
||||
assert.are.same(expected, actual)
|
||||
assert.are.same(expectedHeaders, actualHeaders)
|
||||
end)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for bom, i in pairs(BOM) do
|
||||
for newline, j in pairs(newlines) do
|
||||
for quote, k in pairs(quotes) do
|
||||
for _, endline in ipairs(endlines) do
|
||||
local name = "should handle only keeping a few fields with a rename to an existing field (%s + %s + %s) EOF: %s"
|
||||
it(name:format(bom, newline, quote, endline), function()
|
||||
local expectedHeaders = {"a", "b"}
|
||||
local expected = {}
|
||||
expected[1] = {}
|
||||
expected[1].a = "apple"
|
||||
expected[1].b = "carrot"
|
||||
|
||||
local defaultString = "%s`a`,`b`,`c`%s`apple`,`banana`,`carrot`%s"
|
||||
defaultString = defaultString:gsub("`", k)
|
||||
|
||||
if endline == "NONE" then
|
||||
defaultString = defaultString:format(i, j, "")
|
||||
else
|
||||
defaultString = defaultString:format(i, j, j)
|
||||
end
|
||||
|
||||
local options = {loadFromString=true, rename={["c"] = "b"}, fieldsToKeep={"a","b"}}
|
||||
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
|
||||
assert.are.same(expected, actual)
|
||||
assert.are.same(expectedHeaders, actualHeaders)
|
||||
end)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for bom, i in pairs(BOM) do
|
||||
for newline, j in pairs(newlines) do
|
||||
for quote, k in pairs(quotes) do
|
||||
for _, endline in ipairs(endlines) do
|
||||
local name = "should handle only keeping a few fields with a rename to a new field (%s + %s + %s) EOF: %s"
|
||||
it(name:format(bom, newline, quote, endline), function()
|
||||
local expectedHeaders = {"a", "f"}
|
||||
local expected = {}
|
||||
expected[1] = {}
|
||||
expected[1].a = "apple"
|
||||
expected[1].f = "carrot"
|
||||
|
||||
local defaultString = "%s`a`,`b`,`c`%s`apple`,`banana`,`carrot`%s"
|
||||
defaultString = defaultString:gsub("`", k)
|
||||
|
||||
if endline == "NONE" then
|
||||
defaultString = defaultString:format(i, j, "")
|
||||
else
|
||||
defaultString = defaultString:format(i, j, j)
|
||||
end
|
||||
|
||||
local options = {loadFromString=true, rename={["c"] = "f"}, fieldsToKeep={"a","f"}}
|
||||
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
|
||||
assert.are.same(expected, actual)
|
||||
assert.are.same(expectedHeaders, actualHeaders)
|
||||
end)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for bom, i in pairs(BOM) do
|
||||
for newline, j in pairs(newlines) do
|
||||
for quote, k in pairs(quotes) do
|
||||
for _, endline in ipairs(endlines) do
|
||||
local name = "should apply a function via headerFunc (%s + %s + %s) EOF: %s"
|
||||
it(name:format(bom, newline, quote, endline), function()
|
||||
local expectedHeaders = {"A", "B", "C"}
|
||||
local expected = {}
|
||||
expected[1] = {}
|
||||
expected[1].A = "apple"
|
||||
expected[1].B = "banana"
|
||||
expected[1].C = "carrot"
|
||||
|
||||
local defaultString = "%s`a`,`b`,`c`%s`apple`,`banana`,`carrot`%s"
|
||||
defaultString = defaultString:gsub("`", k)
|
||||
|
||||
if endline == "NONE" then
|
||||
defaultString = defaultString:format(i, j, "")
|
||||
else
|
||||
defaultString = defaultString:format(i, j, j)
|
||||
end
|
||||
|
||||
local options = {loadFromString=true, headerFunc=string.upper}
|
||||
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
|
||||
assert.are.same(expected, actual)
|
||||
assert.are.same(expectedHeaders, actualHeaders)
|
||||
end)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for bom, i in pairs(BOM) do
|
||||
for newline, j in pairs(newlines) do
|
||||
for quote, k in pairs(quotes) do
|
||||
for _, endline in ipairs(endlines) do
|
||||
local name = "should apply a function via headerFunc with rename and fieldsToKeep (%s + %s + %s) EOF: %s"
|
||||
it(name:format(bom, newline, quote, endline), function()
|
||||
local expectedHeaders = {"A", "F"}
|
||||
local expected = {}
|
||||
expected[1] = {}
|
||||
expected[1].A = "apple"
|
||||
expected[1].F = "carrot"
|
||||
|
||||
local defaultString = "%s`a`,`b`,`c`%s`apple`,`banana`,`carrot`%s"
|
||||
defaultString = defaultString:gsub("`", k)
|
||||
|
||||
if endline == "NONE" then
|
||||
defaultString = defaultString:format(i, j, "")
|
||||
else
|
||||
defaultString = defaultString:format(i, j, j)
|
||||
end
|
||||
|
||||
local options = {loadFromString=true, rename={["c"] = "f"}, fieldsToKeep={"A","F"}, headerFunc=string.upper}
|
||||
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
|
||||
assert.are.same(expected, actual)
|
||||
assert.are.same(expectedHeaders, actualHeaders)
|
||||
end)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for bom, i in pairs(BOM) do
|
||||
for newline, j in pairs(newlines) do
|
||||
for _, endline in ipairs(endlines) do
|
||||
local name = "should handle escaped doublequotes (%s + %s) EOF: %s"
|
||||
it(name:format(bom, newline, endline), function()
|
||||
local expectedHeaders = {"a", "b", "c"}
|
||||
local expected = {}
|
||||
expected[1] = {}
|
||||
expected[1].a = '"apple"'
|
||||
expected[1].b = '"banana"'
|
||||
expected[1].c = '"carrot"'
|
||||
|
||||
local defaultString = '%s"a","b","c"%s"""apple""","""banana""","""carrot"""%s'
|
||||
|
||||
if endline == "NONE" then
|
||||
defaultString = defaultString:format(i, j, "")
|
||||
else
|
||||
defaultString = defaultString:format(i, j, j)
|
||||
end
|
||||
|
||||
local options = {loadFromString=true}
|
||||
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
|
||||
assert.are.same(expected, actual)
|
||||
assert.are.same(expectedHeaders, actualHeaders)
|
||||
end)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
-- HEADERLESS TESTS START HERE
|
||||
|
||||
for bom, i in pairs(BOM) do
|
||||
for newline, j in pairs(newlines) do
|
||||
for quote, k in pairs(quotes) do
|
||||
for _, endline in ipairs(endlines) do
|
||||
local name = "should handle files without headers (%s + %s + %s) EOF: %s"
|
||||
it(name:format(bom, newline, quote, endline), function()
|
||||
local expectedHeaders = {1, 2, 3}
|
||||
local expected = {}
|
||||
expected[1] = {}
|
||||
expected[1][1] = "apple"
|
||||
expected[1][2] = "banana"
|
||||
expected[1][3] = "carrot"
|
||||
expected[2] = {}
|
||||
expected[2][1] = "diamond"
|
||||
expected[2][2] = "emerald"
|
||||
expected[2][3] = "pearl"
|
||||
|
||||
local defaultString = "%s`apple`,`banana`,`carrot`%s`diamond`,`emerald`,`pearl`%s"
|
||||
defaultString = defaultString:gsub("`", k)
|
||||
|
||||
if endline == "NONE" then
|
||||
defaultString = defaultString:format(i, j, "")
|
||||
else
|
||||
defaultString = defaultString:format(i, j, j)
|
||||
end
|
||||
|
||||
local options = {loadFromString=true, headers=false}
|
||||
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
|
||||
assert.are.same(expected, actual)
|
||||
assert.are.same(expectedHeaders, actualHeaders)
|
||||
end)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for bom, i in pairs(BOM) do
|
||||
for newline, j in pairs(newlines) do
|
||||
for quote, k in pairs(quotes) do
|
||||
for _, endline in ipairs(endlines) do
|
||||
local name = "should handle files without headers and with one row (%s + %s + %s) EOF: %s"
|
||||
it(name:format(bom, newline, quote, endline), function()
|
||||
local expectedHeaders = {1, 2, 3}
|
||||
local expected = {}
|
||||
expected[1] = {}
|
||||
expected[1][1] = "apple"
|
||||
expected[1][2] = "banana"
|
||||
expected[1][3] = "carrot"
|
||||
|
||||
local defaultString = "%s`apple`,`banana`,`carrot`%s"
|
||||
defaultString = defaultString:gsub("`", k)
|
||||
|
||||
if endline == "NONE" then
|
||||
defaultString = defaultString:format(i, "")
|
||||
else
|
||||
defaultString = defaultString:format(i, j)
|
||||
end
|
||||
|
||||
local options = {loadFromString=true, headers=false}
|
||||
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
|
||||
assert.are.same(expected, actual)
|
||||
assert.are.same(expectedHeaders, actualHeaders)
|
||||
end)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for bom, i in pairs(BOM) do
|
||||
for newline, j in pairs(newlines) do
|
||||
for quote, k in pairs(quotes) do
|
||||
for _, endline in ipairs(endlines) do
|
||||
local name = "should handle renaming fields from files without headers (%s + %s + %s) EOF: %s"
|
||||
it(name:format(bom, newline, quote, endline), function()
|
||||
local expectedHeaders = {"a", "b", "c"}
|
||||
local expected = {}
|
||||
expected[1] = {}
|
||||
expected[1].a = "apple"
|
||||
expected[1].b = "banana"
|
||||
expected[1].c = "carrot"
|
||||
expected[2] = {}
|
||||
expected[2].a = "diamond"
|
||||
expected[2].b = "emerald"
|
||||
expected[2].c = "pearl"
|
||||
|
||||
local defaultString = "%s`apple`,`banana`,`carrot`%s`diamond`,`emerald`,`pearl`%s"
|
||||
defaultString = defaultString:gsub("`", k)
|
||||
|
||||
if endline == "NONE" then
|
||||
defaultString = defaultString:format(i, j, "")
|
||||
else
|
||||
defaultString = defaultString:format(i, j, j)
|
||||
end
|
||||
|
||||
local options = {loadFromString=true, headers=false, rename={"a","b","c"}}
|
||||
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
|
||||
assert.are.same(expected, actual)
|
||||
assert.are.same(expectedHeaders, actualHeaders)
|
||||
end)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for bom, i in pairs(BOM) do
|
||||
for newline, j in pairs(newlines) do
|
||||
for quote, k in pairs(quotes) do
|
||||
for _, endline in ipairs(endlines) do
|
||||
local name = "should handle renaming fields from files without headers and only keeping a few fields (%s + %s + %s) EOF: %s"
|
||||
it(name:format(bom, newline, quote, endline), function()
|
||||
local expectedHeaders = {"a", "b"}
|
||||
local expected = {}
|
||||
expected[1] = {}
|
||||
expected[1].a = "apple"
|
||||
expected[1].b = "banana"
|
||||
expected[2] = {}
|
||||
expected[2].a = "diamond"
|
||||
expected[2].b = "emerald"
|
||||
|
||||
local defaultString = "%s`apple`,`banana`,`carrot`%s`diamond`,`emerald`,`pearl`%s"
|
||||
defaultString = defaultString:gsub("`", k)
|
||||
|
||||
if endline == "NONE" then
|
||||
defaultString = defaultString:format(i, j, "")
|
||||
else
|
||||
defaultString = defaultString:format(i, j, j)
|
||||
end
|
||||
|
||||
local options = {loadFromString=true, headers=false, rename={"a","b","c"}, fieldsToKeep={"a","b"}}
|
||||
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
|
||||
assert.are.same(expected, actual)
|
||||
assert.are.same(expectedHeaders, actualHeaders)
|
||||
end)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for bom, i in pairs(BOM) do
|
||||
for newline, j in pairs(newlines) do
|
||||
for quote, k in pairs(quotes) do
|
||||
for _, endline in ipairs(endlines) do
|
||||
local name = "should handle if the number of renames doesn't equal the number of fields (%s + %s + %s) EOF: %s"
|
||||
it(name:format(bom, newline, quote, endline), function()
|
||||
local expectedHeaders = {"a", "b"}
|
||||
local expected = {}
|
||||
expected[1] = {}
|
||||
expected[1].a = "apple"
|
||||
expected[1].b = "banana"
|
||||
expected[2] = {}
|
||||
expected[2].a = "diamond"
|
||||
expected[2].b = "emerald"
|
||||
|
||||
local defaultString = "%s`apple`,`banana`,`carrot`%s`diamond`,`emerald`,`pearl`%s"
|
||||
defaultString = defaultString:gsub("`", k)
|
||||
|
||||
if endline == "NONE" then
|
||||
defaultString = defaultString:format(i, j, "")
|
||||
else
|
||||
defaultString = defaultString:format(i, j, j)
|
||||
end
|
||||
|
||||
local options = {loadFromString=true, headers=false, rename={"a","b"}, fieldsToKeep={"a","b"}}
|
||||
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
|
||||
assert.are.same(expected, actual)
|
||||
assert.are.same(expectedHeaders, actualHeaders)
|
||||
end)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
end)
|
Loading…
Reference in New Issue
Block a user