mirror of
https://github.com/geoffleyland/lua-csv.git
synced 2024-11-23 01:34:19 +00:00
tidy BOM handling and add a test
This commit is contained in:
parent
54a7bb2221
commit
22d84c44ee
40
lua/csv.lua
40
lua/csv.lua
@ -240,6 +240,37 @@ local function guess_separator(buffer, parameters)
|
|||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
local unicode_BOMS =
|
||||||
|
{
|
||||||
|
{
|
||||||
|
length = 2,
|
||||||
|
BOMS =
|
||||||
|
{
|
||||||
|
["\254\255"] = true, -- UTF-16 big-endian
|
||||||
|
["\255\254"] = true, -- UTF-16 little-endian
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
length = 3,
|
||||||
|
BOMS =
|
||||||
|
{
|
||||||
|
["\239\187\191"] = true, -- UTF-8
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
local function find_unicode_BOM(sub)
|
||||||
|
for _, x in ipairs(unicode_BOMS) do
|
||||||
|
local code = sub(1, x.length)
|
||||||
|
if x.BOMS[code] then
|
||||||
|
return x.length
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return 0
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
--- Iterate through the records in a file
|
--- Iterate through the records in a file
|
||||||
-- Since records might be more than one line (if there's a newline in quotes)
|
-- Since records might be more than one line (if there's a newline in quotes)
|
||||||
-- and line-endings might not be native, we read the file in chunks of
|
-- and line-endings might not be native, we read the file in chunks of
|
||||||
@ -276,14 +307,7 @@ local function separated_values_iterator(buffer, parameters)
|
|||||||
|
|
||||||
|
|
||||||
-- Is there some kind of Unicode BOM here?
|
-- Is there some kind of Unicode BOM here?
|
||||||
if field_sub(1, 3) == "\239\187\191" then -- UTF-8
|
advance(find_unicode_BOM(field_sub))
|
||||||
advance(3)
|
|
||||||
elseif field_sub(1, 2) == "\254\255" then -- UTF-16 big-endian
|
|
||||||
advance(2)
|
|
||||||
elseif field_sub(1, 2) == "\255\254" then -- UTF-16 little-endian
|
|
||||||
advance(2)
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
-- Start reading the file
|
-- Start reading the file
|
||||||
local sep = guess_separator(buffer, parameters)
|
local sep = guess_separator(buffer, parameters)
|
||||||
|
@ -74,6 +74,14 @@ apple:four,charlie:60!]],
|
|||||||
test("../test-data/blank-line.csv", [[
|
test("../test-data/blank-line.csv", [[
|
||||||
this,file,ends,with,a,blank,line!]])
|
this,file,ends,with,a,blank,line!]])
|
||||||
|
|
||||||
|
test("../test-data/BOM.csv", [[
|
||||||
|
apple:one,charlie:30!
|
||||||
|
apple:four,charlie:60!]],
|
||||||
|
{ columns = {
|
||||||
|
apple = { name = "ALPHA", transform = string.lower },
|
||||||
|
charlie = { transform = function(x) return tonumber(x) * 10 end }}})
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if errors == 0 then
|
if errors == 0 then
|
||||||
io.stdout:write("Passed\n")
|
io.stdout:write("Passed\n")
|
||||||
|
3
test-data/BOM.csv
Normal file
3
test-data/BOM.csv
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
alpha,bravo,charlie
|
||||||
|
ONE,two,3
|
||||||
|
four,five,6
|
|
Loading…
Reference in New Issue
Block a user