returned headers should now be correct, BOM fix for headerless files and added dynamic_features_spec (#19)

This commit is contained in:
Shakil Thakur 2018-05-29 19:31:28 -05:00 committed by GitHub
parent 2dff1f2764
commit 8bd8fbe065
2 changed files with 484 additions and 2 deletions

View File

@ -295,6 +295,24 @@ local function parseString(inputString, inputLength, delimiter, i, headerField,
return outResults
end
-- determine the real headers as opposed to the header mapping
local function determineRealHeaders(headerField, fieldsToKeep)
local realHeaders = {}
local headerSet = {}
for i = 1, #headerField do
if not headerSet[headerField[i]] then
if fieldsToKeep ~= nil and fieldsToKeep[headerField[i]] then
table.insert(realHeaders, headerField[i])
headerSet[headerField[i]] = true
elseif fieldsToKeep == nil then
table.insert(realHeaders, headerField[i])
headerSet[headerField[i]] = true
end
end
end
return realHeaders
end
-- runs the show!
function ftcsv.parse(inputFile, delimiter, options)
-- delimiter MUST be one character
@ -373,7 +391,7 @@ function ftcsv.parse(inputFile, delimiter, options)
-- for files where there aren't headers!
if header == false then
i = 1
i = startLine
for j = 1, #headerField do
headerField[j] = j
end
@ -404,7 +422,8 @@ function ftcsv.parse(inputFile, delimiter, options)
end
local output = parseString(inputString, inputLength, delimiter, i, headerField, fieldsToKeep)
return output, headerField
local realHeaders = determineRealHeaders(headerField, fieldsToKeep)
return output, realHeaders
end
-- a function that delimits " to "", used by the writer

View File

@ -0,0 +1,463 @@
local ftcsv = require "ftcsv"
local BOM = {["NO BOM"] = "", ["BOM"] = string.char(239, 187, 191)}
local newlines = {["LF"] = "\n", ["CRLF"] = "\r\n", ["CR"] = "\r"}
local endlines = {"NONE", "NEWLINE"}
local quotes = {["NO QUOTES"] = "", ["DOUBLE QUOTES"] = '"'}
describe("csv features", function()
for bom, i in pairs(BOM) do
for newline, j in pairs(newlines) do
for quote, k in pairs(quotes) do
for _, endline in ipairs(endlines) do
local name = "should handle loading from string (%s + %s + %s) EOF: %s"
it(name:format(bom, newline, quote, endline), function()
local expectedHeaders = {"a", "b", "c"}
local expected = {}
expected[1] = {}
expected[1].a = "apple"
expected[1].b = "banana"
expected[1].c = "carrot"
local defaultString = "%s`a`,`b`,`c`%s`apple`,`banana`,`carrot`%s"
defaultString = defaultString:gsub("`", k)
if endline == "NONE" then
defaultString = defaultString:format(i, j, "")
else
defaultString = defaultString:format(i, j, j)
end
local options = {loadFromString=true}
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
assert.are.same(expected, actual)
assert.are.same(expectedHeaders, actualHeaders)
end)
end
end
end
end
for bom, i in pairs(BOM) do
for newline, j in pairs(newlines) do
for quote, k in pairs(quotes) do
for _, endline in ipairs(endlines) do
local name = "should handle renaming fields (%s + %s + %s) EOF: %s"
it(name:format(bom, newline, quote, endline), function()
local expectedHeaders = {"d", "e", "f"}
local expected = {}
expected[1] = {}
expected[1].d = "apple"
expected[1].e = "banana"
expected[1].f = "carrot"
local defaultString = "%s`a`,`b`,`c`%s`apple`,`banana`,`carrot`%s"
defaultString = defaultString:gsub("`", k)
if endline == "NONE" then
defaultString = defaultString:format(i, j, "")
else
defaultString = defaultString:format(i, j, j)
end
local options = {loadFromString=true, rename={["a"] = "d", ["b"] = "e", ["c"] = "f"}}
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
assert.are.same(expected, actual)
assert.are.same(expectedHeaders, actualHeaders)
end)
end
end
end
end
for bom, i in pairs(BOM) do
for newline, j in pairs(newlines) do
for quote, k in pairs(quotes) do
for _, endline in ipairs(endlines) do
local name = "should handle renaming fields to the same out value (%s + %s + %s) EOF: %s"
it(name:format(bom, newline, quote, endline), function()
local expectedHeaders = {"d", "e"}
local expected = {}
expected[1] = {}
expected[1].d = "apple"
expected[1].e = "carrot"
local defaultString = "%s`a`,`b`,`c`%s`apple`,`banana`,`carrot`%s"
defaultString = defaultString:gsub("`", k)
if endline == "NONE" then
defaultString = defaultString:format(i, j, "")
else
defaultString = defaultString:format(i, j, j)
end
local options = {loadFromString=true, rename={["a"] = "d", ["b"] = "e", ["c"] = "e"}}
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
assert.are.same(expected, actual)
assert.are.same(expectedHeaders, actualHeaders)
end)
end
end
end
end
for bom, i in pairs(BOM) do
for newline, j in pairs(newlines) do
for quote, k in pairs(quotes) do
for _, endline in ipairs(endlines) do
local name = "should handle keeping only a few fields (%s + %s + %s) EOF: %s"
it(name:format(bom, newline, quote, endline), function()
local expectedHeaders = {"a", "b"}
local expected = {}
expected[1] = {}
expected[1].a = "apple"
expected[1].b = "banana"
local defaultString = "%s`a`,`b`,`c`%s`apple`,`banana`,`carrot`%s"
defaultString = defaultString:gsub("`", k)
if endline == "NONE" then
defaultString = defaultString:format(i, j, "")
else
defaultString = defaultString:format(i, j, j)
end
local options = {loadFromString=true, fieldsToKeep={"a", "b"}}
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
assert.are.same(expected, actual)
assert.are.same(expectedHeaders, actualHeaders)
end)
end
end
end
end
for bom, i in pairs(BOM) do
for newline, j in pairs(newlines) do
for quote, k in pairs(quotes) do
for _, endline in ipairs(endlines) do
local name = "should handle only keeping a few fields with a rename to an existing field (%s + %s + %s) EOF: %s"
it(name:format(bom, newline, quote, endline), function()
local expectedHeaders = {"a", "b"}
local expected = {}
expected[1] = {}
expected[1].a = "apple"
expected[1].b = "carrot"
local defaultString = "%s`a`,`b`,`c`%s`apple`,`banana`,`carrot`%s"
defaultString = defaultString:gsub("`", k)
if endline == "NONE" then
defaultString = defaultString:format(i, j, "")
else
defaultString = defaultString:format(i, j, j)
end
local options = {loadFromString=true, rename={["c"] = "b"}, fieldsToKeep={"a","b"}}
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
assert.are.same(expected, actual)
assert.are.same(expectedHeaders, actualHeaders)
end)
end
end
end
end
for bom, i in pairs(BOM) do
for newline, j in pairs(newlines) do
for quote, k in pairs(quotes) do
for _, endline in ipairs(endlines) do
local name = "should handle only keeping a few fields with a rename to a new field (%s + %s + %s) EOF: %s"
it(name:format(bom, newline, quote, endline), function()
local expectedHeaders = {"a", "f"}
local expected = {}
expected[1] = {}
expected[1].a = "apple"
expected[1].f = "carrot"
local defaultString = "%s`a`,`b`,`c`%s`apple`,`banana`,`carrot`%s"
defaultString = defaultString:gsub("`", k)
if endline == "NONE" then
defaultString = defaultString:format(i, j, "")
else
defaultString = defaultString:format(i, j, j)
end
local options = {loadFromString=true, rename={["c"] = "f"}, fieldsToKeep={"a","f"}}
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
assert.are.same(expected, actual)
assert.are.same(expectedHeaders, actualHeaders)
end)
end
end
end
end
for bom, i in pairs(BOM) do
for newline, j in pairs(newlines) do
for quote, k in pairs(quotes) do
for _, endline in ipairs(endlines) do
local name = "should apply a function via headerFunc (%s + %s + %s) EOF: %s"
it(name:format(bom, newline, quote, endline), function()
local expectedHeaders = {"A", "B", "C"}
local expected = {}
expected[1] = {}
expected[1].A = "apple"
expected[1].B = "banana"
expected[1].C = "carrot"
local defaultString = "%s`a`,`b`,`c`%s`apple`,`banana`,`carrot`%s"
defaultString = defaultString:gsub("`", k)
if endline == "NONE" then
defaultString = defaultString:format(i, j, "")
else
defaultString = defaultString:format(i, j, j)
end
local options = {loadFromString=true, headerFunc=string.upper}
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
assert.are.same(expected, actual)
assert.are.same(expectedHeaders, actualHeaders)
end)
end
end
end
end
for bom, i in pairs(BOM) do
for newline, j in pairs(newlines) do
for quote, k in pairs(quotes) do
for _, endline in ipairs(endlines) do
local name = "should apply a function via headerFunc with rename and fieldsToKeep (%s + %s + %s) EOF: %s"
it(name:format(bom, newline, quote, endline), function()
local expectedHeaders = {"A", "F"}
local expected = {}
expected[1] = {}
expected[1].A = "apple"
expected[1].F = "carrot"
local defaultString = "%s`a`,`b`,`c`%s`apple`,`banana`,`carrot`%s"
defaultString = defaultString:gsub("`", k)
if endline == "NONE" then
defaultString = defaultString:format(i, j, "")
else
defaultString = defaultString:format(i, j, j)
end
local options = {loadFromString=true, rename={["c"] = "f"}, fieldsToKeep={"A","F"}, headerFunc=string.upper}
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
assert.are.same(expected, actual)
assert.are.same(expectedHeaders, actualHeaders)
end)
end
end
end
end
for bom, i in pairs(BOM) do
for newline, j in pairs(newlines) do
for _, endline in ipairs(endlines) do
local name = "should handle escaped doublequotes (%s + %s) EOF: %s"
it(name:format(bom, newline, endline), function()
local expectedHeaders = {"a", "b", "c"}
local expected = {}
expected[1] = {}
expected[1].a = '"apple"'
expected[1].b = '"banana"'
expected[1].c = '"carrot"'
local defaultString = '%s"a","b","c"%s"""apple""","""banana""","""carrot"""%s'
if endline == "NONE" then
defaultString = defaultString:format(i, j, "")
else
defaultString = defaultString:format(i, j, j)
end
local options = {loadFromString=true}
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
assert.are.same(expected, actual)
assert.are.same(expectedHeaders, actualHeaders)
end)
end
end
end
-- HEADERLESS TESTS START HERE
for bom, i in pairs(BOM) do
for newline, j in pairs(newlines) do
for quote, k in pairs(quotes) do
for _, endline in ipairs(endlines) do
local name = "should handle files without headers (%s + %s + %s) EOF: %s"
it(name:format(bom, newline, quote, endline), function()
local expectedHeaders = {1, 2, 3}
local expected = {}
expected[1] = {}
expected[1][1] = "apple"
expected[1][2] = "banana"
expected[1][3] = "carrot"
expected[2] = {}
expected[2][1] = "diamond"
expected[2][2] = "emerald"
expected[2][3] = "pearl"
local defaultString = "%s`apple`,`banana`,`carrot`%s`diamond`,`emerald`,`pearl`%s"
defaultString = defaultString:gsub("`", k)
if endline == "NONE" then
defaultString = defaultString:format(i, j, "")
else
defaultString = defaultString:format(i, j, j)
end
local options = {loadFromString=true, headers=false}
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
assert.are.same(expected, actual)
assert.are.same(expectedHeaders, actualHeaders)
end)
end
end
end
end
for bom, i in pairs(BOM) do
for newline, j in pairs(newlines) do
for quote, k in pairs(quotes) do
for _, endline in ipairs(endlines) do
local name = "should handle files without headers and with one row (%s + %s + %s) EOF: %s"
it(name:format(bom, newline, quote, endline), function()
local expectedHeaders = {1, 2, 3}
local expected = {}
expected[1] = {}
expected[1][1] = "apple"
expected[1][2] = "banana"
expected[1][3] = "carrot"
local defaultString = "%s`apple`,`banana`,`carrot`%s"
defaultString = defaultString:gsub("`", k)
if endline == "NONE" then
defaultString = defaultString:format(i, "")
else
defaultString = defaultString:format(i, j)
end
local options = {loadFromString=true, headers=false}
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
assert.are.same(expected, actual)
assert.are.same(expectedHeaders, actualHeaders)
end)
end
end
end
end
for bom, i in pairs(BOM) do
for newline, j in pairs(newlines) do
for quote, k in pairs(quotes) do
for _, endline in ipairs(endlines) do
local name = "should handle renaming fields from files without headers (%s + %s + %s) EOF: %s"
it(name:format(bom, newline, quote, endline), function()
local expectedHeaders = {"a", "b", "c"}
local expected = {}
expected[1] = {}
expected[1].a = "apple"
expected[1].b = "banana"
expected[1].c = "carrot"
expected[2] = {}
expected[2].a = "diamond"
expected[2].b = "emerald"
expected[2].c = "pearl"
local defaultString = "%s`apple`,`banana`,`carrot`%s`diamond`,`emerald`,`pearl`%s"
defaultString = defaultString:gsub("`", k)
if endline == "NONE" then
defaultString = defaultString:format(i, j, "")
else
defaultString = defaultString:format(i, j, j)
end
local options = {loadFromString=true, headers=false, rename={"a","b","c"}}
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
assert.are.same(expected, actual)
assert.are.same(expectedHeaders, actualHeaders)
end)
end
end
end
end
for bom, i in pairs(BOM) do
for newline, j in pairs(newlines) do
for quote, k in pairs(quotes) do
for _, endline in ipairs(endlines) do
local name = "should handle renaming fields from files without headers and only keeping a few fields (%s + %s + %s) EOF: %s"
it(name:format(bom, newline, quote, endline), function()
local expectedHeaders = {"a", "b"}
local expected = {}
expected[1] = {}
expected[1].a = "apple"
expected[1].b = "banana"
expected[2] = {}
expected[2].a = "diamond"
expected[2].b = "emerald"
local defaultString = "%s`apple`,`banana`,`carrot`%s`diamond`,`emerald`,`pearl`%s"
defaultString = defaultString:gsub("`", k)
if endline == "NONE" then
defaultString = defaultString:format(i, j, "")
else
defaultString = defaultString:format(i, j, j)
end
local options = {loadFromString=true, headers=false, rename={"a","b","c"}, fieldsToKeep={"a","b"}}
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
assert.are.same(expected, actual)
assert.are.same(expectedHeaders, actualHeaders)
end)
end
end
end
end
for bom, i in pairs(BOM) do
for newline, j in pairs(newlines) do
for quote, k in pairs(quotes) do
for _, endline in ipairs(endlines) do
local name = "should handle if the number of renames doesn't equal the number of fields (%s + %s + %s) EOF: %s"
it(name:format(bom, newline, quote, endline), function()
local expectedHeaders = {"a", "b"}
local expected = {}
expected[1] = {}
expected[1].a = "apple"
expected[1].b = "banana"
expected[2] = {}
expected[2].a = "diamond"
expected[2].b = "emerald"
local defaultString = "%s`apple`,`banana`,`carrot`%s`diamond`,`emerald`,`pearl`%s"
defaultString = defaultString:gsub("`", k)
if endline == "NONE" then
defaultString = defaultString:format(i, j, "")
else
defaultString = defaultString:format(i, j, j)
end
local options = {loadFromString=true, headers=false, rename={"a","b"}, fieldsToKeep={"a","b"}}
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
assert.are.same(expected, actual)
assert.are.same(expectedHeaders, actualHeaders)
end)
end
end
end
end
end)