From 2ad3fb7be9f96d3d2f480ed50987e349dd40a043 Mon Sep 17 00:00:00 2001 From: FourierTransformer Date: Thu, 30 Nov 2017 21:58:36 -0600 Subject: [PATCH 1/4] can now handle os 9 line endings --- ftcsv.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ftcsv.lua b/ftcsv.lua index af4abec..5dea5fb 100644 --- a/ftcsv.lua +++ b/ftcsv.lua @@ -215,7 +215,7 @@ local function parseString(inputString, inputLength, delimiter, i, headerField, -- end -- newline?! - elseif ((currentChar == CR and nextChar == LF) or currentChar == LF) then + elseif (currentChar == CR or currentChar == LF) then if fieldsToKeep == nil or fieldsToKeep[headerField[fieldNum]] then -- create the new field field = createField(inputString, quote, fieldStart, i, doubleQuoteEscape) From 6c820e03a12d7ceb954f1d0e1e6ee8f017b942b1 Mon Sep 17 00:00:00 2001 From: FourierTransformer Date: Thu, 30 Nov 2017 22:02:49 -0600 Subject: [PATCH 2/4] version bump --- README.md | 2 +- ftcsv-1.1.3-1.rockspec => ftcsv-1.1.4-1.rockspec | 4 ++-- ftcsv.lua | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) rename ftcsv-1.1.3-1.rockspec => ftcsv-1.1.4-1.rockspec (95%) diff --git a/README.md b/README.md index e624010..a724e75 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ ftcsv, a fairly fast csv library written in pure Lua. It's been tested with LuaJIT 2.0/2.1 and Lua 5.1, 5.2, and 5.3 -It works well for CSVs that can easily be fully loaded into memory (easily up to a hundred MB). Currently, there isn't a "large" file mode with proper readers and writers for ingesting CSVs in bulk with a fixed amount of memory. It correctly handles both `\n` (LF) and `\r\n` (CRLF) line endings (ie it should work with Windows and Mac/Linux line endings) and has UTF-8 support. +It works well for CSVs that can easily be fully loaded into memory (easily up to a hundred MB). Currently, there isn't a "large" file mode with proper readers and writers for ingesting CSVs in bulk with a fixed amount of memory. It correctly handles both `\n` (LF), `\r` (CR) and `\r\n` (CRLF) line endings (ie it should work with Unix, Mac OS 9, and Windows line endings) and has UTF-8 support. diff --git a/ftcsv-1.1.3-1.rockspec b/ftcsv-1.1.4-1.rockspec similarity index 95% rename from ftcsv-1.1.3-1.rockspec rename to ftcsv-1.1.4-1.rockspec index 8267efb..236ef9d 100644 --- a/ftcsv-1.1.3-1.rockspec +++ b/ftcsv-1.1.4-1.rockspec @@ -1,9 +1,9 @@ package = "ftcsv" -version = "1.1.3-1" +version = "1.1.4-1" source = { url = "git://github.com/FourierTransformer/ftcsv.git", - tag = "1.1.3" + tag = "1.1.4" } description = { diff --git a/ftcsv.lua b/ftcsv.lua index 5dea5fb..2afd833 100644 --- a/ftcsv.lua +++ b/ftcsv.lua @@ -1,5 +1,5 @@ local ftcsv = { - _VERSION = 'ftcsv 1.1.3', + _VERSION = 'ftcsv 1.1.4', _DESCRIPTION = 'CSV library for Lua', _URL = 'https://github.com/FourierTransformer/ftcsv', _LICENSE = [[ From d2ddda79f7bd1da0cbaaeaeb3a9bd55fc1a896bb Mon Sep 17 00:00:00 2001 From: FourierTransformer Date: Thu, 30 Nov 2017 22:46:10 -0600 Subject: [PATCH 3/4] will now strip out BOM --- README.md | 2 +- ftcsv.lua | 8 +++++++- spec/csvs/bom-os9.csv | 1 + spec/csvs/os9.csv | 1 + spec/json/bom-os9.json | 12 ++++++++++++ spec/json/os9.json | 12 ++++++++++++ spec/parse_encode_spec.lua | 2 ++ 7 files changed, 36 insertions(+), 2 deletions(-) create mode 100644 spec/csvs/bom-os9.csv create mode 100644 spec/csvs/os9.csv create mode 100644 spec/json/bom-os9.json create mode 100644 spec/json/os9.json diff --git a/README.md b/README.md index a724e75..75a886a 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ ftcsv, a fairly fast csv library written in pure Lua. It's been tested with LuaJIT 2.0/2.1 and Lua 5.1, 5.2, and 5.3 -It works well for CSVs that can easily be fully loaded into memory (easily up to a hundred MB). Currently, there isn't a "large" file mode with proper readers and writers for ingesting CSVs in bulk with a fixed amount of memory. It correctly handles both `\n` (LF), `\r` (CR) and `\r\n` (CRLF) line endings (ie it should work with Unix, Mac OS 9, and Windows line endings) and has UTF-8 support. +It works well for CSVs that can easily be fully loaded into memory (easily up to a hundred MB). Currently, there isn't a "large" file mode with proper readers and writers for ingesting CSVs in bulk with a fixed amount of memory. It correctly handles both `\n` (LF), `\r` (CR) and `\r\n` (CRLF) line endings (ie it should work with Unix, Mac OS 9, and Windows line endings), strips out the utf BOM (if it exists), and has UTF-8 support. diff --git a/ftcsv.lua b/ftcsv.lua index 2afd833..a3d1d9c 100644 --- a/ftcsv.lua +++ b/ftcsv.lua @@ -347,7 +347,13 @@ function ftcsv.parse(inputFile, delimiter, options) end -- parse through the headers! - local headerField, i = parseString(inputString, inputLength, delimiter, 1) + local startLine = 1 + + -- check for BOM + if string.byte(inputString, 1) == 239 and string.byte(inputString, 2) == 187 and string.byte(inputString, 3) == 191 then + startLine = 4 + end + local headerField, i = parseString(inputString, inputLength, delimiter, startLine) i = i + 1 -- start at the next char -- make sure a header isn't empty diff --git a/spec/csvs/bom-os9.csv b/spec/csvs/bom-os9.csv new file mode 100644 index 0000000..3ea2148 --- /dev/null +++ b/spec/csvs/bom-os9.csv @@ -0,0 +1 @@ +a,b,c 1,2,3 4,5,ʤ \ No newline at end of file diff --git a/spec/csvs/os9.csv b/spec/csvs/os9.csv new file mode 100644 index 0000000..4f06168 --- /dev/null +++ b/spec/csvs/os9.csv @@ -0,0 +1 @@ +a,b,c 1,2,3 4,5,ʤ \ No newline at end of file diff --git a/spec/json/bom-os9.json b/spec/json/bom-os9.json new file mode 100644 index 0000000..8ced204 --- /dev/null +++ b/spec/json/bom-os9.json @@ -0,0 +1,12 @@ +[ + { + "a": "1", + "b": "2", + "c": "3" + }, + { + "a": "4", + "b": "5", + "c": "ʤ" + } +] \ No newline at end of file diff --git a/spec/json/os9.json b/spec/json/os9.json new file mode 100644 index 0000000..8ced204 --- /dev/null +++ b/spec/json/os9.json @@ -0,0 +1,12 @@ +[ + { + "a": "1", + "b": "2", + "c": "3" + }, + { + "a": "4", + "b": "5", + "c": "ʤ" + } +] \ No newline at end of file diff --git a/spec/parse_encode_spec.lua b/spec/parse_encode_spec.lua index f18c690..b7e1366 100644 --- a/spec/parse_encode_spec.lua +++ b/spec/parse_encode_spec.lua @@ -10,6 +10,7 @@ local function loadFile(textFile) end local files = { + "bom-os9", "comma_in_quotes", "correctness", "empty", @@ -22,6 +23,7 @@ local files = { "json_no_newline", "newlines", "newlines_crlf", + "os9", "quotes_and_newlines", "quotes_non_escaped", "simple", From 111199c94a1e8f6c929fc862996e9b2f9203ac2a Mon Sep 17 00:00:00 2001 From: FourierTransformer Date: Fri, 1 Dec 2017 08:50:16 -0600 Subject: [PATCH 4/4] minor clarifications --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 75a886a..4de6d38 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ ftcsv, a fairly fast csv library written in pure Lua. It's been tested with LuaJIT 2.0/2.1 and Lua 5.1, 5.2, and 5.3 -It works well for CSVs that can easily be fully loaded into memory (easily up to a hundred MB). Currently, there isn't a "large" file mode with proper readers and writers for ingesting CSVs in bulk with a fixed amount of memory. It correctly handles both `\n` (LF), `\r` (CR) and `\r\n` (CRLF) line endings (ie it should work with Unix, Mac OS 9, and Windows line endings), strips out the utf BOM (if it exists), and has UTF-8 support. +It works well for CSVs that can easily be fully loaded into memory (easily up to a hundred MB). Currently, there isn't a "large" file mode with proper readers and writers for ingesting CSVs in bulk with a fixed amount of memory. It correctly handles both `\n` (LF), `\r` (CR) and `\r\n` (CRLF) line endings (ie it should work with Unix, Mac OS 9, and Windows line endings), and has UTF-8 support (it will strip out BOM if it exists).