will now strip out BOM

This commit is contained in:
FourierTransformer 2017-11-30 22:46:10 -06:00
parent 6c820e03a1
commit d2ddda79f7
7 changed files with 36 additions and 2 deletions

View File

@ -3,7 +3,7 @@
ftcsv, a fairly fast csv library written in pure Lua. It's been tested with LuaJIT 2.0/2.1 and Lua 5.1, 5.2, and 5.3 ftcsv, a fairly fast csv library written in pure Lua. It's been tested with LuaJIT 2.0/2.1 and Lua 5.1, 5.2, and 5.3
It works well for CSVs that can easily be fully loaded into memory (easily up to a hundred MB). Currently, there isn't a "large" file mode with proper readers and writers for ingesting CSVs in bulk with a fixed amount of memory. It correctly handles both `\n` (LF), `\r` (CR) and `\r\n` (CRLF) line endings (ie it should work with Unix, Mac OS 9, and Windows line endings) and has UTF-8 support. It works well for CSVs that can easily be fully loaded into memory (easily up to a hundred MB). Currently, there isn't a "large" file mode with proper readers and writers for ingesting CSVs in bulk with a fixed amount of memory. It correctly handles both `\n` (LF), `\r` (CR) and `\r\n` (CRLF) line endings (ie it should work with Unix, Mac OS 9, and Windows line endings), strips out the utf BOM (if it exists), and has UTF-8 support.

View File

@ -347,7 +347,13 @@ function ftcsv.parse(inputFile, delimiter, options)
end end
-- parse through the headers! -- parse through the headers!
local headerField, i = parseString(inputString, inputLength, delimiter, 1) local startLine = 1
-- check for BOM
if string.byte(inputString, 1) == 239 and string.byte(inputString, 2) == 187 and string.byte(inputString, 3) == 191 then
startLine = 4
end
local headerField, i = parseString(inputString, inputLength, delimiter, startLine)
i = i + 1 -- start at the next char i = i + 1 -- start at the next char
-- make sure a header isn't empty -- make sure a header isn't empty

1
spec/csvs/bom-os9.csv Normal file
View File

@ -0,0 +1 @@
a,b,c 1,2,3 4,5,ʤ
1 a b c 1 2 3 4 5 ʤ

1
spec/csvs/os9.csv Normal file
View File

@ -0,0 +1 @@
a,b,c 1,2,3 4,5,ʤ
1 a b c 1 2 3 4 5 ʤ

12
spec/json/bom-os9.json Normal file
View File

@ -0,0 +1,12 @@
[
{
"a": "1",
"b": "2",
"c": "3"
},
{
"a": "4",
"b": "5",
"c": "ʤ"
}
]

12
spec/json/os9.json Normal file
View File

@ -0,0 +1,12 @@
[
{
"a": "1",
"b": "2",
"c": "3"
},
{
"a": "4",
"b": "5",
"c": "ʤ"
}
]

View File

@ -10,6 +10,7 @@ local function loadFile(textFile)
end end
local files = { local files = {
"bom-os9",
"comma_in_quotes", "comma_in_quotes",
"correctness", "correctness",
"empty", "empty",
@ -22,6 +23,7 @@ local files = {
"json_no_newline", "json_no_newline",
"newlines", "newlines",
"newlines_crlf", "newlines_crlf",
"os9",
"quotes_and_newlines", "quotes_and_newlines",
"quotes_non_escaped", "quotes_non_escaped",
"simple", "simple",