csv.lua: track the buffer start with a variable called (surprisingly) buffer_start, and now field_start and line_start count from the start of the whole file, not the current state of the buffer

This commit is contained in:
Geoff Leyland 2014-05-26 18:17:54 +12:00
parent e2ea3d2f1a
commit d9d9e419c7

View File

@ -151,24 +151,23 @@ end
local function separated_values_iterator(file, parameters) local function separated_values_iterator(file, parameters)
local buffer_size = parameters.buffer_size or DEFAULT_BUFFER_SIZE local buffer_size = parameters.buffer_size or DEFAULT_BUFFER_SIZE
local buffer = "" local buffer = ""
local field_start = 1 local buffer_start = 0
local line_start = 1
-- Cut the front off the buffer if we've already read it -- Cut the front off the buffer if we've already read it
local function truncate(p) local function truncate(p)
p = p - buffer_start
if p > buffer_size then if p > buffer_size then
local remove = math.floor((p-1) / buffer_size) * buffer_size local remove = math.floor((p-1) / buffer_size) * buffer_size
buffer = buffer:sub(remove + 1) buffer = buffer:sub(remove + 1)
field_start = field_start - remove buffer_start = buffer_start + remove
line_start = line_start - remove
end end
end end
-- Extend the buffer so we can see more -- Extend the buffer so we can see more
local function extend(offset) local function extend(offset)
local extra = offset - #buffer local extra = offset - #buffer - buffer_start
if extra > 0 then if extra > 0 then
local size = math.ceil(extra / buffer_size) * buffer_size local size = math.ceil(extra / buffer_size) * buffer_size
local s = file:read(size) local s = file:read(size)
@ -182,19 +181,22 @@ local function separated_values_iterator(file, parameters)
local function find(pattern, init) local function find(pattern, init)
local first, last, capture local first, last, capture
while true do while true do
first, last, capture = buffer:find(pattern, init) first, last, capture = buffer:find(pattern, init - buffer_start)
-- if we found nothing, or the last character is at the end of the -- if we found nothing, or the last character is at the end of the
-- buffer (and the match could potentially be longer) then read some -- buffer (and the match could potentially be longer) then read some
-- more. -- more.
if not first or last == #buffer then if not first or last == #buffer then
local s = file:read(buffer_size) local s = file:read(buffer_size)
-- if we read nothing from the file: if not s then
-- - and first is nil, then below we're returning nil if not first then
-- - and last == #buffer, then the capture we found above is good. return
if not s then return first, last, capture end else
return first + buffer_start, last + buffer_start, capture
end
end
buffer = buffer..s buffer = buffer..s
else else
return first, last, capture return first + buffer_start, last + buffer_start, capture
end end
end end
end end
@ -203,11 +205,14 @@ local function separated_values_iterator(file, parameters)
-- Get a substring from the buffer, extending it if necessary -- Get a substring from the buffer, extending it if necessary
local function sub(a, b) local function sub(a, b)
extend(b) extend(b)
return buffer:sub(a, b) b = b == -1 and b or b - buffer_start
return buffer:sub(a - buffer_start, b)
end end
local filename = parameters.filename or "<unknown>" local filename = parameters.filename or "<unknown>"
local field_start = 1
local line_start = 1
local line = 1 local line = 1
local column_name_map = parameters.columns and local column_name_map = parameters.columns and
build_column_name_map(parameters.columns) build_column_name_map(parameters.columns)