diff --git a/README.md b/README.md index 5329f14..96ee04f 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ ftcsv, a fairly fast csv library written in pure Lua. It's been tested with LuaJIT 2.0/2.1 and Lua 5.1, 5.2, and 5.3 -It works well for CSVs that can easily be fully loaded into memory (easily up to a hundred MB). Currently, there isn't a "large" file mode with proper readers and writers for ingesting CSVs in bulk with a fixed amount of memory. It correctly handles both `\n` (LF) and `\r\n` (CRLF) line endings (ie it should work with Windows and Mac/Linux line endings) and has UTF-8 support. +It works well for CSVs that can easily be fully loaded into memory (easily up to a hundred MB). Currently, there isn't a "large" file mode with proper readers and writers for ingesting CSVs in bulk with a fixed amount of memory. It correctly handles both `\n` (LF), `\r` (CR) and `\r\n` (CRLF) line endings (ie it should work with Unix, Mac OS 9, and Windows line endings), and has UTF-8 support (it will strip out BOM if it exists). diff --git a/ftcsv-1.1.3-1.rockspec b/ftcsv-1.1.4-1.rockspec similarity index 95% rename from ftcsv-1.1.3-1.rockspec rename to ftcsv-1.1.4-1.rockspec index 8267efb..236ef9d 100644 --- a/ftcsv-1.1.3-1.rockspec +++ b/ftcsv-1.1.4-1.rockspec @@ -1,9 +1,9 @@ package = "ftcsv" -version = "1.1.3-1" +version = "1.1.4-1" source = { url = "git://github.com/FourierTransformer/ftcsv.git", - tag = "1.1.3" + tag = "1.1.4" } description = { diff --git a/ftcsv.lua b/ftcsv.lua index 787f007..760456a 100644 --- a/ftcsv.lua +++ b/ftcsv.lua @@ -1,5 +1,5 @@ local ftcsv = { - _VERSION = 'ftcsv 1.1.3', + _VERSION = 'ftcsv 1.1.4', _DESCRIPTION = 'CSV library for Lua', _URL = 'https://github.com/FourierTransformer/ftcsv', _LICENSE = [[ @@ -215,7 +215,7 @@ local function parseString(inputString, inputLength, delimiter, i, headerField, -- end -- newline?! - elseif ((currentChar == CR and nextChar == LF) or currentChar == LF) then + elseif (currentChar == CR or currentChar == LF) then if fieldsToKeep == nil or fieldsToKeep[headerField[fieldNum]] then -- create the new field field = createField(inputString, quote, fieldStart, i, doubleQuoteEscape) @@ -347,7 +347,13 @@ function ftcsv.parse(inputFile, delimiter, options) end -- parse through the headers! - local headerField, i = parseString(inputString, inputLength, delimiter, 1) + local startLine = 1 + + -- check for BOM + if string.byte(inputString, 1) == 239 and string.byte(inputString, 2) == 187 and string.byte(inputString, 3) == 191 then + startLine = 4 + end + local headerField, i = parseString(inputString, inputLength, delimiter, startLine) i = i + 1 -- start at the next char -- make sure a header isn't empty diff --git a/spec/csvs/bom-os9.csv b/spec/csvs/bom-os9.csv new file mode 100644 index 0000000..3ea2148 --- /dev/null +++ b/spec/csvs/bom-os9.csv @@ -0,0 +1 @@ +a,b,c 1,2,3 4,5,ʤ \ No newline at end of file diff --git a/spec/csvs/os9.csv b/spec/csvs/os9.csv new file mode 100644 index 0000000..4f06168 --- /dev/null +++ b/spec/csvs/os9.csv @@ -0,0 +1 @@ +a,b,c 1,2,3 4,5,ʤ \ No newline at end of file diff --git a/spec/json/bom-os9.json b/spec/json/bom-os9.json new file mode 100644 index 0000000..8ced204 --- /dev/null +++ b/spec/json/bom-os9.json @@ -0,0 +1,12 @@ +[ + { + "a": "1", + "b": "2", + "c": "3" + }, + { + "a": "4", + "b": "5", + "c": "ʤ" + } +] \ No newline at end of file diff --git a/spec/json/os9.json b/spec/json/os9.json new file mode 100644 index 0000000..8ced204 --- /dev/null +++ b/spec/json/os9.json @@ -0,0 +1,12 @@ +[ + { + "a": "1", + "b": "2", + "c": "3" + }, + { + "a": "4", + "b": "5", + "c": "ʤ" + } +] \ No newline at end of file diff --git a/spec/parse_encode_spec.lua b/spec/parse_encode_spec.lua index f18c690..b7e1366 100644 --- a/spec/parse_encode_spec.lua +++ b/spec/parse_encode_spec.lua @@ -10,6 +10,7 @@ local function loadFile(textFile) end local files = { + "bom-os9", "comma_in_quotes", "correctness", "empty", @@ -22,6 +23,7 @@ local files = { "json_no_newline", "newlines", "newlines_crlf", + "os9", "quotes_and_newlines", "quotes_non_escaped", "simple",