From ec99b22faeca4740164f4bf1ac480701f2ae0c07 Mon Sep 17 00:00:00 2001 From: Geoff Leyland Date: Mon, 9 Jun 2014 17:58:38 +1200 Subject: [PATCH] skip UTF-8 or 16 BOMs if you find one. There's other BOMs but this should do for now --- lua/csv.lua | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/lua/csv.lua b/lua/csv.lua index ee22b49..972c431 100644 --- a/lua/csv.lua +++ b/lua/csv.lua @@ -263,12 +263,23 @@ local function separated_values_iterator(buffer, parameters) local function field_find(pattern, init) + init = init or 1 local f, l, c = buffer:find(pattern, init + field_start - 1) if not f then return end return f - field_start + 1, l - field_start + 1, c end + -- Is there some kind of Unicode BOM here? + if field_find("^\239\187\191") then -- UTF-8 + advance(3) + elseif field_find("^\254\255") then -- UTF-16 big-endian + advance(2) + elseif field_find("^\255\254") then -- UTF-16 little-endian + advance(2) + end + + -- Start reading the file local sep = guess_separator(buffer, parameters) local line_start = 1