mirror of
https://github.com/geoffleyland/lua-csv.git
synced 2024-11-23 01:34:19 +00:00
csv.lua: make the column map managing stuff its own class to try to make separated_values_iterator cleaner. hmmm
This commit is contained in:
parent
c4f21c0264
commit
fd9d21cb9c
48
lua/csv.lua
48
lua/csv.lua
@ -32,11 +32,14 @@ end
|
|||||||
|
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
local column_map = {}
|
||||||
|
column_map.__index = column_map
|
||||||
|
|
||||||
--- Parse a list of columns.
|
--- Parse a list of columns.
|
||||||
-- The main job here is normalising column names and dealing with columns
|
-- The main job here is normalising column names and dealing with columns
|
||||||
-- for which we have more than one possible name in the header.
|
-- for which we have more than one possible name in the header.
|
||||||
local function build_column_name_map(columns)
|
function column_map:new(columns, filename)
|
||||||
local column_name_map = {}
|
local name_map = {}
|
||||||
for n, v in pairs(columns) do
|
for n, v in pairs(columns) do
|
||||||
local names
|
local names
|
||||||
local t
|
local t
|
||||||
@ -62,11 +65,11 @@ local function build_column_name_map(columns)
|
|||||||
|
|
||||||
t.name = n
|
t.name = n
|
||||||
for _, n in ipairs(names) do
|
for _, n in ipairs(names) do
|
||||||
column_name_map[n:lower()] = t
|
name_map[n:lower()] = t
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
return column_name_map
|
return setmetatable({ name_map = name_map, filename = filename }, column_map)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
@ -74,23 +77,23 @@ end
|
|||||||
-- Once we've read the header, work out which columns we're interested in and
|
-- Once we've read the header, work out which columns we're interested in and
|
||||||
-- what to do with them. Mostly this is about checking we've got the columns
|
-- what to do with them. Mostly this is about checking we've got the columns
|
||||||
-- we need and writing a nice complaint if we haven't.
|
-- we need and writing a nice complaint if we haven't.
|
||||||
local function build_column_index_map(header, column_name_map)
|
function column_map:read_header(header)
|
||||||
local column_index_map = {}
|
local index_map = {}
|
||||||
|
|
||||||
-- Match the columns in the file to the columns in the name map
|
-- Match the columns in the file to the columns in the name map
|
||||||
local found = {}
|
local found = {}
|
||||||
for i, word in ipairs(header) do
|
for i, word in ipairs(header) do
|
||||||
word = word:lower():gsub("[^%w%d]+", " "):gsub("^ *(.-) *$", "%1")
|
word = word:lower():gsub("[^%w%d]+", " "):gsub("^ *(.-) *$", "%1")
|
||||||
local r = column_name_map[word]
|
local r = self.name_map[word]
|
||||||
if r then
|
if r then
|
||||||
column_index_map[i] = r
|
index_map[i] = r
|
||||||
found[r.name] = true
|
found[r.name] = true
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
-- check we found all the columns we need
|
-- check we found all the columns we need
|
||||||
local not_found = {}
|
local not_found = {}
|
||||||
for name, r in pairs(column_name_map) do
|
for name, r in pairs(self.name_map) do
|
||||||
if not found[r.name] then
|
if not found[r.name] then
|
||||||
local nf = not_found[r.name]
|
local nf = not_found[r.name]
|
||||||
if nf then
|
if nf then
|
||||||
@ -119,19 +122,19 @@ local function build_column_index_map(header, column_name_map)
|
|||||||
error(table.concat(problems, "\n"), 0)
|
error(table.concat(problems, "\n"), 0)
|
||||||
end
|
end
|
||||||
|
|
||||||
return column_index_map
|
self.index_map = index_map
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
local function transform_field(value, index, map, filename, line, column)
|
function column_map:transform(value, index, line, column)
|
||||||
local field = map[index]
|
local field = self.index_map[index]
|
||||||
if field then
|
if field then
|
||||||
if field.transform then
|
if field.transform then
|
||||||
local ok
|
local ok
|
||||||
ok, value = pcall(field.transform, value)
|
ok, value = pcall(field.transform, value)
|
||||||
if not ok then
|
if not ok then
|
||||||
error(("%s:%d:%d: Couldn't read field '%s': %s"):
|
error(("%s:%d:%d: Couldn't read field '%s': %s"):
|
||||||
format(filename or "<unknown>", line, column,
|
format(self.filename or "<unknown>", line, column,
|
||||||
field.name, value))
|
field.name, value))
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@ -273,7 +276,7 @@ local function separated_values_iterator(buffer, parameters)
|
|||||||
local line_start = 1
|
local line_start = 1
|
||||||
local line = 1
|
local line = 1
|
||||||
local field_count, fields, starts = 0, {}, {}
|
local field_count, fields, starts = 0, {}, {}
|
||||||
local column_index_map, header
|
local header, header_read
|
||||||
|
|
||||||
while true do
|
while true do
|
||||||
local field_start_line = line
|
local field_start_line = line
|
||||||
@ -321,9 +324,9 @@ local function separated_values_iterator(buffer, parameters)
|
|||||||
|
|
||||||
-- Insert the value into the table for this "line"
|
-- Insert the value into the table for this "line"
|
||||||
local key
|
local key
|
||||||
if column_index_map then
|
if parameters.column_map and header_read then
|
||||||
value, key = transform_field(value, field_count, column_index_map,
|
value, key = parameters.column_map:transform(value, field_count,
|
||||||
parameters.filename, field_start_line, field_start_column)
|
field_start_line, field_start_column)
|
||||||
elseif header then
|
elseif header then
|
||||||
key = header[field_count]
|
key = header[field_count]
|
||||||
else
|
else
|
||||||
@ -336,11 +339,12 @@ local function separated_values_iterator(buffer, parameters)
|
|||||||
|
|
||||||
-- if we ended on a newline then yield the fields on this line.
|
-- if we ended on a newline then yield the fields on this line.
|
||||||
if not this_sep or this_sep == "\r" or this_sep == "\n" then
|
if not this_sep or this_sep == "\r" or this_sep == "\n" then
|
||||||
if parameters.column_name_map and not column_index_map then
|
if parameters.column_map and not header_read then
|
||||||
column_index_map =
|
parameters.column_map:read_header(fields)
|
||||||
build_column_index_map(fields, parameters.column_name_map)
|
header_read = true
|
||||||
elseif parameters.header and not header then
|
elseif parameters.header and not header then
|
||||||
header = fields
|
header = fields
|
||||||
|
header_read = true
|
||||||
else
|
else
|
||||||
local k, v = next(fields)
|
local k, v = next(fields)
|
||||||
if v ~= "" or field_count > 1 then -- ignore blank lines
|
if v ~= "" or field_count > 1 then -- ignore blank lines
|
||||||
@ -388,8 +392,8 @@ buffer_mt.__index = buffer_mt
|
|||||||
|
|
||||||
local function use(buffer, parameters)
|
local function use(buffer, parameters)
|
||||||
parameters.filename = parameters.filename or "<unknown>"
|
parameters.filename = parameters.filename or "<unknown>"
|
||||||
parameters.column_name_map = parameters.columns and
|
parameters.column_map = parameters.columns and
|
||||||
build_column_name_map(parameters.columns)
|
column_map:new(parameters.columns, parameters.filename)
|
||||||
local f = { buffer = buffer, parameters = parameters }
|
local f = { buffer = buffer, parameters = parameters }
|
||||||
return setmetatable(f, buffer_mt)
|
return setmetatable(f, buffer_mt)
|
||||||
end
|
end
|
||||||
|
Loading…
Reference in New Issue
Block a user