adding ftcsv and test scripts

This commit is contained in:
FourierTransformer 2016-03-09 06:37:25 -06:00
commit 572997ae12
32 changed files with 537 additions and 0 deletions

270
ftcsv.lua Normal file
View File

@ -0,0 +1,270 @@
local ftcsv = {}
local function findClosingQuote(i, inputLength, inputString, quote, doubleQuoteEscape)
local doubleQuoteEscape = doubleQuoteEscape
while i <= inputLength do
-- print(i)
local currentChar = string.byte(inputString, i)
local nextChar = string.byte(inputString, i+1)
-- this one deals with " double quotes that are escaped "" within single quotes "
-- these should be turned into a single quote at the end of the field
if currentChar == quote and nextChar == quote then
doubleQuoteEscape = true
i = i + 2
-- identifies the escape toggle
elseif currentChar == quote and nextChar ~= quote then
return i-1, doubleQuoteEscape
else
i = i + 1
end
end
end
local function createNewField(inputString, quote, fieldStart, i, line, fieldNum, doubleQuoteEscape, fieldsToKeep)
-- print(lineNum, fieldNum, fieldStart, i-1)
-- so, if we just recently de-escaped, we don't want the trailing \"
-- if fieldsToKeep == nil then
-- local fieldsToKeep = fieldsToKeep
if fieldsToKeep == nil or fieldsToKeep[fieldNum] then
-- print(fieldsToKeep)
if string.byte(inputString, i-1) == quote then
-- print("Skipping last \"")
line[fieldNum] = string.sub(inputString, fieldStart, i-2)
else
line[fieldNum] = string.sub(inputString, fieldStart, i-1)
end
-- remove the double quotes (if they existed)
if doubleQuoteEscape then
-- print("QUOTE REPLACE")
-- print(line[fieldNum])
line[fieldNum] = line[fieldNum]:gsub('""', '"')
return false
end
end
end
local function createHeaders(line, rename, fieldsToKeep)
-- print("CREATING HEADERS")
local headers = {}
for i = 1, #line do
if rename[line[i]] then
headers[i] = rename[line[i]]
else
headers[i] = line[i]
end
end
if fieldsToKeep ~= nil then
for i = 1, #fieldsToKeep do
fieldsToKeep[fieldsToKeep[i]] = true
end
end
return headers, 0, true, fieldsToKeep
end
function ftcsv.decode(inputString, separator, options)
-- each line in outResults holds another table
local outResults = {}
outResults[1] = {}
-- separator MUST be one character
if #separator ~= 1 and type("separator") ~= "string" then error("the separator must be of string type and exactly one character") end
local separator = string.byte(separator)
-- OPTIONS yo
local header = true
local rename = {}
local fieldsToKeep = nil
local ofieldsToKeep = nil
if options then
if options.headers ~= nil then
if type(options.headers) ~= "boolean" then
error("ftcsv only takes the boolean 'true' or 'false' for the optional parameter 'headers' (default 'true'). You passed in '" .. options.headers .. "' of type '" .. type(options.headers) .. "'.")
end
header = options.headers
end
if options.rename ~= nil then
if type(options.rename) ~= "table" then
error("ftcsv only takes in a key-value table for the optional parameter 'rename'. You passed in '" .. options.rename .. "' of type '" .. type(options.rename) .. "'.")
end
rename = options.rename
end
if options.fieldsToKeep ~= nil then
ofieldsToKeep = options.fieldsToKeep
if type(options.fieldsToKeep) ~= "table" then
error("ftcsv only takes in a list (as a table for the optional parameter 'fieldsToKeep'. You passed in '" .. options.fieldsToKeep .. "' of type '" .. type(options.fieldsToKeep) .. "'.")
end
end
end
local CR = string.byte("\r")
local LF = string.byte("\n")
local quote = string.byte("\"")
local doubleQuoteEscape = false
local fieldStart = 1
local fieldNum = 1
local lineNum = 1
local skipChar = 0
local inputLength = #inputString
local headerField = {}
local headerSet = false
local i = 1
-- keep track of my chars!
local currentChar, nextChar = string.byte(inputString, i), string.byte(inputString, i+1)
while i <= inputLength do
-- go by two chars at a time!
-- currentChar = string.byte(inputString, i)
nextChar = string.byte(inputString, i+1)
-- print(i, string.char(currentChar), string.char(nextChar))
-- keeps track of characters to "skip" while going through the encoding process
-- if skipChar == 0 then
-- empty string
if currentChar == quote and nextChar == quote then
-- print("EMPTY STRING")
skipChar = 1
fieldStart = i + 2
-- print("fs+2:", fieldStart)
-- identifies the escape toggle
elseif currentChar == quote and nextChar ~= quote then
-- print("ESCAPE TOGGLE")
fieldStart = i + 1
i, doubleQuoteEscape = findClosingQuote(i+1, inputLength, inputString, quote, doubleQuoteEscape)
-- print("I VALUE", i, doubleQuoteEscape)
skipChar = 1
-- end
-- create some fields if we can!
elseif currentChar == separator then
-- for that first field
if not headerSet and lineNum == 1 then
headerField[fieldNum] = fieldNum
end
-- create the new field
-- print(headerField[fieldNum])
doubleQuoteEscape = createNewField(inputString, quote, fieldStart, i, outResults[lineNum], headerField[fieldNum], doubleQuoteEscape, fieldsToKeep)
fieldNum = fieldNum + 1
fieldStart = i + 1
-- print("fs+1:", fieldStart)
-- end
-- newline?!
elseif ((currentChar == CR and nextChar == LF) or currentChar == LF) then
-- keep track of headers
if not headerSet and lineNum == 1 then
headerField[fieldNum] = fieldNum
end
-- create the new field
doubleQuoteEscape = createNewField(inputString, quote, fieldStart, i, outResults[lineNum], headerField[fieldNum], doubleQuoteEscape, fieldsToKeep)
-- if we have headers then we gotta do something about it
if header and lineNum == 1 and not headerSet then
headerField, lineNum, headerSet, fieldsToKeep = createHeaders(outResults[lineNum], rename, ofieldsToKeep)
end
lineNum = lineNum + 1
outResults[lineNum] = {}
fieldNum = 1
fieldStart = i + 1
-- print("fs:", fieldStart)
if (currentChar == CR and nextChar == LF) then
-- print("CRLF DETECTED")
skipChar = 1
fieldStart = fieldStart + 1
-- print("fs:", fieldStart)
end
end
i = i + 1 + skipChar
if (skipChar > 0) then
currentChar = string.byte(inputString, i)
else
currentChar = nextChar
end
skipChar = 0
end
-- if the line doesn't end happily (with a quote/newline), the last char will be forgotten.
-- this should take care of that.
createNewField(inputString, quote, fieldStart, i, outResults[lineNum], headerField[fieldNum], doubleQuoteEscape, fieldsToKeep)
-- end
-- clean up last line if it's weird (this happens when there is a CRLF newline at end of file)
-- doing a count gets it to pick up the oddballs
local count = 0
for _, _ in pairs(outResults[lineNum]) do
count = count + 1
end
if count ~= #headerField then
outResults[lineNum] = nil
end
return outResults
end
local function delimitField(field)
if field:find('"') then
return '"' .. field:gsub('"', '""') .. '"'
elseif field:find(" ") or field:find(",") or field:find("\n") then
return '"' .. field .. '"'
elseif field == "" then
return '""'
else
return field
end
end
function ftcsv.encode(inputTable, separator, headers)
-- separator MUST be one character
if #separator ~= 1 and type("separator") ~= "string" then error("the separator must be of string type and exactly one character") end
-- keep track of me output
local output = {}
-- grab the headers from the first file if they are not provided
-- we'll do this easily and not so quickly...
local headers = headers
if headers == nil then
headers = {}
for key, _ in pairs(inputTable[1]) do
headers[#headers+1] = key
end
-- lets make the headers alphabetical
table.sort(headers)
end
-- this is for outputting the headers
local line = {}
for i, header in ipairs(headers) do
line[i] = delimitField(header)
end
line.length = #line
-- string the header together yo
output[1] = table.concat(line, separator)
-- cheap and fast (because buffers)
for i, fields in ipairs(inputTable) do
local numHeaders = 0
for j = 1, #headers do
local field = fields[headers[j]]
line[j] = delimitField(field)
numHeaders = j
end
-- all lines should have the same number of fields
if line.length ~= numHeaders then
error("All lines should have the same length. The line at row " .. i .. " is of length " .. numHeaders .. " instead of " .. line.length)
end
output[i+1] = table.concat(line, separator)
end
return table.concat(output, "\r\n")
end
return ftcsv

View File

@ -0,0 +1,2 @@
first,last,address,city,zip
John,Doe,120 any st.,"Anytown, WW",08123
1 first last address city zip
2 John Doe 120 any st. Anytown, WW 08123

View File

@ -0,0 +1,7 @@
Year,Make,Model,Description,Price
1997,Ford,E350,"ac, abs, moon",3000.00
1999,Chevy,"Venture ""Extended Edition""","",4900.00
1996,Jeep,Grand Cherokee,"MUST SELL!
air, moon roof, loaded",4799.00
1999,Chevy,"Venture ""Extended Edition, Very Large""",,5000.00
,,"Venture ""Extended Edition""","",4900.00
1 Year Make Model Description Price
2 1997 Ford E350 ac, abs, moon 3000.00
3 1999 Chevy Venture "Extended Edition" 4900.00
4 1996 Jeep Grand Cherokee MUST SELL! air, moon roof, loaded 4799.00
5 1999 Chevy Venture "Extended Edition, Very Large" 5000.00
6 Venture "Extended Edition" 4900.00

3
spec/csvs/empty.csv Normal file
View File

@ -0,0 +1,3 @@
a,b,c
1,"",""
2,3,4
1 a b c
2 1
3 2 3 4

3
spec/csvs/empty_crlf.csv Normal file
View File

@ -0,0 +1,3 @@
a,b,c
1,"",""
2,3,4
1 a b c
2 1
3 2 3 4

View File

@ -0,0 +1,2 @@
a,b,c
1,"",""
1 a b c
2 1

View File

@ -0,0 +1,2 @@
a,b,c
1,,
1 a b c
2 1

View File

@ -0,0 +1,3 @@
a,b
1,"ha ""ha"" ha"
3,4
1 a b
2 1 ha "ha" ha
3 3 4

2
spec/csvs/json.csv Normal file
View File

@ -0,0 +1,2 @@
key,val
1,"{""type"": ""Point"", ""coordinates"": [102.0, 0.5]}"
1 key val
2 1 {"type": "Point", "coordinates": [102.0, 0.5]}

View File

@ -0,0 +1,2 @@
key,val
1,"{""type"": ""Point"", ""coordinates"": [102.0, 0.5]}"
1 key val
2 1 {"type": "Point", "coordinates": [102.0, 0.5]}

5
spec/csvs/newlines.csv Normal file
View File

@ -0,0 +1,5 @@
a,b,c
1,2,3
"Once upon
a time",5,6
7,8,9
1 a b c
2 1 2 3
3 Once upon a time 5 6
4 7 8 9

View File

@ -0,0 +1,5 @@
a,b,c
1,2,3
"Once upon
a time",5,6
7,8,9
1 a b c
2 1 2 3
3 Once upon a time 5 6
4 7 8 9

View File

@ -0,0 +1,5 @@
a,b
1,"ha
""ha""
ha"
3,4
1 a b
2 1 ha "ha" ha
3 3 4

2
spec/csvs/simple.csv Normal file
View File

@ -0,0 +1,2 @@
a,b,c
1,2,3
1 a b c
2 1 2 3

View File

@ -0,0 +1,2 @@
a,b,c
1,2,3
1 a b c
2 1 2 3

3
spec/csvs/utf8.csv Normal file
View File

@ -0,0 +1,3 @@
a,b,c
1,2,3
4,5,ʤ
1 a b c
2 1 2 3
3 4 5 ʤ

View File

@ -0,0 +1,9 @@
[
{
"first": "John",
"last": "Doe",
"address": "120 any st.",
"city": "Anytown, WW",
"zip": "08123"
}
]

View File

@ -0,0 +1,37 @@
[
{
"Make": "Ford",
"Year": "1997",
"Price": "3000.00",
"Model": "E350",
"Description": "ac, abs, moon"
},
{
"Make": "Chevy",
"Year": "1999",
"Price": "4900.00",
"Model": "Venture \"Extended Edition\"",
"Description": ""
},
{
"Make": "Jeep",
"Year": "1996",
"Price": "4799.00",
"Model": "Grand Cherokee",
"Description": "MUST SELL!\nair, moon roof, loaded"
},
{
"Make": "Chevy",
"Year": "1999",
"Price": "5000.00",
"Model": "Venture \"Extended Edition, Very Large\"",
"Description": ""
},
{
"Make": "",
"Year": "",
"Price": "4900.00",
"Model": "Venture \"Extended Edition\"",
"Description": ""
}
]

4
spec/json/empty.json Normal file
View File

@ -0,0 +1,4 @@
[
{ "a": "1", "b": "", "c": "" },
{ "a": "2", "b": "3", "c": "4" }
]

View File

@ -0,0 +1,4 @@
[
{ "a": "1", "b": "", "c": "" },
{ "a": "2", "b": "3", "c": "4" }
]

View File

@ -0,0 +1,3 @@
[
{ "a": "1", "b": "", "c": "" }
]

View File

@ -0,0 +1,3 @@
[
{ "a": "1", "b": "", "c": "" }
]

View File

@ -0,0 +1,10 @@
[
{
"a": "1",
"b": "ha \"ha\" ha"
},
{
"a": "3",
"b": "4"
}
]

6
spec/json/json.json Normal file
View File

@ -0,0 +1,6 @@
[
{
"key": "1",
"val": "{\"type\": \"Point\", \"coordinates\": [102.0, 0.5]}"
}
]

View File

@ -0,0 +1,6 @@
[
{
"key": "1",
"val": "{\"type\": \"Point\", \"coordinates\": [102.0, 0.5]}"
}
]

17
spec/json/newlines.json Normal file
View File

@ -0,0 +1,17 @@
[
{
"a": "1",
"b": "2",
"c": "3"
},
{
"a": "Once upon \na time",
"b": "5",
"c": "6"
},
{
"a": "7",
"b": "8",
"c": "9"
}
]

View File

@ -0,0 +1,17 @@
[
{
"a": "1",
"b": "2",
"c": "3"
},
{
"a": "Once upon \r\na time",
"b": "5",
"c": "6"
},
{
"a": "7",
"b": "8",
"c": "9"
}
]

View File

@ -0,0 +1,10 @@
[
{
"a": "1",
"b": "ha \n\"ha\" \nha"
},
{
"a": "3",
"b": "4"
}
]

7
spec/json/simple.json Normal file
View File

@ -0,0 +1,7 @@
[
{
"a": "1",
"b": "2",
"c": "3"
}
]

View File

@ -0,0 +1,7 @@
[
{
"a": "1",
"b": "2",
"c": "3"
}
]

12
spec/json/utf8.json Normal file
View File

@ -0,0 +1,12 @@
[
{
"a": "1",
"b": "2",
"c": "3"
},
{
"a": "4",
"b": "5",
"c": "ʤ"
}
]

67
spec/test_spec.lua Normal file
View File

@ -0,0 +1,67 @@
local cjson = require("cjson")
local ftcsv = require('ftcsv')
-- local csv = require('csv')
-- local staecsv = require('state-csv')
local function loadFile(textFile)
local file = io.open(textFile, "r")
if not file then error("File not found at " .. textFile) end
local allLines = file:read("*all")
file:close()
return allLines
end
local files = {
"comma_in_quotes",
"correctness",
"empty",
"empty_no_newline",
"empty_no_quotes",
"empty_crlf",
"escaped_quotes",
"json",
"json_no_newline",
"newlines",
"newlines_crlf",
"quotes_and_newlines",
"simple",
"simple_crlf",
"utf8"
}
describe("csv decode", function()
for _, value in ipairs(files) do
it("should handle " .. value, function()
local contents = loadFile("spec/csvs/" .. value .. ".csv")
local json = loadFile("spec/json/" .. value .. ".json")
json = cjson.decode(json)
-- local parse = staecsv:ftcsv(contents, ",")
local parse = ftcsv.decode(contents, ",")
-- local f = csv.openstring(contents, {separator=",", header=true})
-- local parse = {}
-- for fields in f:lines() do
-- parse[#parse+1] = fields
-- end
assert.are.same(json, parse)
end)
end
end)
describe("csv encode", function()
for _, value in ipairs(files) do
it("should handle " .. value, function()
local originalFile = loadFile("spec/csvs/" .. value .. ".csv")
local jsonFile = loadFile("spec/json/" .. value .. ".json")
local jsonDecode = cjson.decode(jsonFile)
-- local parse = staecsv:ftcsv(contents, ",")
local reEncoded = ftcsv.decode(ftcsv.encode(jsonDecode, ","), ",")
-- local f = csv.openstring(contents, {separator=",", header=true})
-- local parse = {}
-- for fields in f:lines() do
-- parse[#parse+1] = fields
-- end
assert.are.same(jsonDecode, reEncoded)
end)
end
end)