From d2ddda79f7bd1da0cbaaeaeb3a9bd55fc1a896bb Mon Sep 17 00:00:00 2001 From: FourierTransformer Date: Thu, 30 Nov 2017 22:46:10 -0600 Subject: [PATCH] will now strip out BOM --- README.md | 2 +- ftcsv.lua | 8 +++++++- spec/csvs/bom-os9.csv | 1 + spec/csvs/os9.csv | 1 + spec/json/bom-os9.json | 12 ++++++++++++ spec/json/os9.json | 12 ++++++++++++ spec/parse_encode_spec.lua | 2 ++ 7 files changed, 36 insertions(+), 2 deletions(-) create mode 100644 spec/csvs/bom-os9.csv create mode 100644 spec/csvs/os9.csv create mode 100644 spec/json/bom-os9.json create mode 100644 spec/json/os9.json diff --git a/README.md b/README.md index a724e75..75a886a 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ ftcsv, a fairly fast csv library written in pure Lua. It's been tested with LuaJIT 2.0/2.1 and Lua 5.1, 5.2, and 5.3 -It works well for CSVs that can easily be fully loaded into memory (easily up to a hundred MB). Currently, there isn't a "large" file mode with proper readers and writers for ingesting CSVs in bulk with a fixed amount of memory. It correctly handles both `\n` (LF), `\r` (CR) and `\r\n` (CRLF) line endings (ie it should work with Unix, Mac OS 9, and Windows line endings) and has UTF-8 support. +It works well for CSVs that can easily be fully loaded into memory (easily up to a hundred MB). Currently, there isn't a "large" file mode with proper readers and writers for ingesting CSVs in bulk with a fixed amount of memory. It correctly handles both `\n` (LF), `\r` (CR) and `\r\n` (CRLF) line endings (ie it should work with Unix, Mac OS 9, and Windows line endings), strips out the utf BOM (if it exists), and has UTF-8 support. diff --git a/ftcsv.lua b/ftcsv.lua index 2afd833..a3d1d9c 100644 --- a/ftcsv.lua +++ b/ftcsv.lua @@ -347,7 +347,13 @@ function ftcsv.parse(inputFile, delimiter, options) end -- parse through the headers! - local headerField, i = parseString(inputString, inputLength, delimiter, 1) + local startLine = 1 + + -- check for BOM + if string.byte(inputString, 1) == 239 and string.byte(inputString, 2) == 187 and string.byte(inputString, 3) == 191 then + startLine = 4 + end + local headerField, i = parseString(inputString, inputLength, delimiter, startLine) i = i + 1 -- start at the next char -- make sure a header isn't empty diff --git a/spec/csvs/bom-os9.csv b/spec/csvs/bom-os9.csv new file mode 100644 index 0000000..3ea2148 --- /dev/null +++ b/spec/csvs/bom-os9.csv @@ -0,0 +1 @@ +a,b,c 1,2,3 4,5,ʤ \ No newline at end of file diff --git a/spec/csvs/os9.csv b/spec/csvs/os9.csv new file mode 100644 index 0000000..4f06168 --- /dev/null +++ b/spec/csvs/os9.csv @@ -0,0 +1 @@ +a,b,c 1,2,3 4,5,ʤ \ No newline at end of file diff --git a/spec/json/bom-os9.json b/spec/json/bom-os9.json new file mode 100644 index 0000000..8ced204 --- /dev/null +++ b/spec/json/bom-os9.json @@ -0,0 +1,12 @@ +[ + { + "a": "1", + "b": "2", + "c": "3" + }, + { + "a": "4", + "b": "5", + "c": "ʤ" + } +] \ No newline at end of file diff --git a/spec/json/os9.json b/spec/json/os9.json new file mode 100644 index 0000000..8ced204 --- /dev/null +++ b/spec/json/os9.json @@ -0,0 +1,12 @@ +[ + { + "a": "1", + "b": "2", + "c": "3" + }, + { + "a": "4", + "b": "5", + "c": "ʤ" + } +] \ No newline at end of file diff --git a/spec/parse_encode_spec.lua b/spec/parse_encode_spec.lua index f18c690..b7e1366 100644 --- a/spec/parse_encode_spec.lua +++ b/spec/parse_encode_spec.lua @@ -10,6 +10,7 @@ local function loadFile(textFile) end local files = { + "bom-os9", "comma_in_quotes", "correctness", "empty", @@ -22,6 +23,7 @@ local files = { "json_no_newline", "newlines", "newlines_crlf", + "os9", "quotes_and_newlines", "quotes_non_escaped", "simple",