2016-03-15 01:12:45 +00:00
local ftcsv = {
2024-08-06 12:58:30 +00:00
_VERSION = ' ftcsv 1.4.0 ' ,
2016-03-15 01:12:45 +00:00
_DESCRIPTION = ' CSV library for Lua ' ,
_URL = ' https://github.com/FourierTransformer/ftcsv ' ,
_LICENSE = [ [
The MIT License ( MIT )
2023-02-03 05:19:00 +00:00
Copyright ( c ) 2016 - 2023 Shakil Thakur
2016-03-15 01:12:45 +00:00
Permission is hereby granted , free of charge , to any person obtaining a copy
of this software and associated documentation files ( the " Software " ) , to deal
in the Software without restriction , including without limitation the rights
to use , copy , modify , merge , publish , distribute , sublicense , and / or sell
copies of the Software , and to permit persons to whom the Software is
furnished to do so , subject to the following conditions :
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software .
THE SOFTWARE IS PROVIDED " AS IS " , WITHOUT WARRANTY OF ANY KIND , EXPRESS OR
IMPLIED , INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY ,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT . IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM , DAMAGES OR OTHER
LIABILITY , WHETHER IN AN ACTION OF CONTRACT , TORT OR OTHERWISE , ARISING FROM ,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE .
] ]
}
2016-04-02 13:42:11 +00:00
-- perf
local sbyte = string.byte
local ssub = string.sub
2020-04-04 18:47:24 +00:00
-- luajit/lua compatability layer
local luaCompatibility = { }
if type ( jit ) == ' table ' or _ENV then
-- luajit and lua 5.2+
luaCompatibility.load = _G.load
else
-- lua 5.1
luaCompatibility.load = loadstring
end
2016-03-19 18:27:47 +00:00
-- luajit specific speedups
-- luajit performs faster with iterating over string.byte,
-- whereas vanilla lua performs faster with string.find
if type ( jit ) == ' table ' then
2020-04-04 18:47:24 +00:00
luaCompatibility.LuaJIT = true
2016-03-19 18:27:47 +00:00
-- finds the end of an escape sequence
2020-04-04 18:47:24 +00:00
function luaCompatibility . findClosingQuote ( i , inputLength , inputString , quote , doubleQuoteEscape )
2016-04-02 13:42:11 +00:00
local currentChar , nextChar = sbyte ( inputString , i ) , nil
2016-03-19 18:27:47 +00:00
while i <= inputLength do
2016-04-02 13:42:11 +00:00
nextChar = sbyte ( inputString , i + 1 )
2016-03-19 18:27:47 +00:00
-- this one deals with " double quotes that are escaped "" within single quotes "
-- these should be turned into a single quote at the end of the field
if currentChar == quote and nextChar == quote then
doubleQuoteEscape = true
i = i + 2
2016-04-02 13:42:11 +00:00
currentChar = sbyte ( inputString , i )
2016-03-19 18:27:47 +00:00
-- identifies the escape toggle
elseif currentChar == quote and nextChar ~= quote then
return i - 1 , doubleQuoteEscape
else
i = i + 1
currentChar = nextChar
end
end
end
else
2020-04-04 18:47:24 +00:00
luaCompatibility.LuaJIT = false
2016-03-19 18:27:47 +00:00
-- vanilla lua closing quote finder
2020-04-04 18:47:24 +00:00
function luaCompatibility . findClosingQuote ( i , inputLength , inputString , quote , doubleQuoteEscape )
2018-05-27 16:26:48 +00:00
local j , difference
i , j = inputString : find ( ' "+ ' , i )
2020-04-04 18:47:24 +00:00
if j == nil then
return nil
2016-03-19 18:27:47 +00:00
end
2018-05-27 16:26:48 +00:00
difference = j - i
if difference >= 1 then doubleQuoteEscape = true end
2020-04-04 18:47:24 +00:00
if difference % 2 == 1 then
return luaCompatibility.findClosingQuote ( j + 1 , inputLength , inputString , quote , doubleQuoteEscape )
2018-05-27 16:26:48 +00:00
end
return j - 1 , doubleQuoteEscape
2016-03-19 18:27:47 +00:00
end
end
2020-04-04 18:47:24 +00:00
-- determine the real headers as opposed to the header mapping
2024-08-06 12:58:30 +00:00
local function determineRealHeaders ( headerField , fieldsToKeep )
2020-04-04 18:47:24 +00:00
local realHeaders = { }
local headerSet = { }
for i = 1 , # headerField do
if not headerSet [ headerField [ i ] ] then
if fieldsToKeep ~= nil and fieldsToKeep [ headerField [ i ] ] then
table.insert ( realHeaders , headerField [ i ] )
headerSet [ headerField [ i ] ] = true
elseif fieldsToKeep == nil then
table.insert ( realHeaders , headerField [ i ] )
headerSet [ headerField [ i ] ] = true
end
end
end
return realHeaders
2016-03-13 06:33:31 +00:00
end
2020-04-04 18:47:24 +00:00
local function determineTotalColumnCount ( headerField , fieldsToKeep )
local totalColumnCount = 0
local headerFieldSet = { }
for _ , header in pairs ( headerField ) do
-- count unique columns and
-- also figure out if it's a field to keep
if not headerFieldSet [ header ] and
( fieldsToKeep == nil or fieldsToKeep [ header ] ) then
headerFieldSet [ header ] = true
totalColumnCount = totalColumnCount + 1
end
2016-03-09 12:37:25 +00:00
end
2020-04-04 18:47:24 +00:00
return totalColumnCount
end
local function generateHeadersMetamethod ( finalHeaders )
-- if a header field tries to escape, we will simply return nil
-- the parser will still parse, but wont get the performance benefit of
-- having headers predefined
for _ , headers in ipairs ( finalHeaders ) do
if headers : find ( " ] " ) then
return nil
end
2016-03-09 12:37:25 +00:00
end
2020-04-04 18:47:24 +00:00
local rawSetup = " local t, k, _ = ... \
rawset ( t , k , { [ [[%s]] ] = true } ) "
rawSetup = rawSetup : format ( table.concat ( finalHeaders , " ]] ]=true, [ [[ " ) )
return luaCompatibility.load ( rawSetup )
2016-03-09 12:37:25 +00:00
end
2016-03-13 06:33:31 +00:00
-- main function used to parse
2020-04-04 18:47:24 +00:00
local function parseString ( inputString , i , options )
2016-03-09 12:37:25 +00:00
2016-04-02 20:42:32 +00:00
-- keep track of my chars!
2020-04-04 18:47:24 +00:00
local inputLength = options.inputLength or # inputString
2016-04-02 20:42:32 +00:00
local currentChar , nextChar = sbyte ( inputString , i ) , nil
local skipChar = 0
local field
local fieldStart = i
local fieldNum = 1
local lineNum = 1
2020-04-04 18:47:24 +00:00
local lineStart = i
2018-05-27 16:26:48 +00:00
local doubleQuoteEscape , emptyIdentified = false , false
2020-04-04 18:47:24 +00:00
local skipIndex
local charPatternToSkip = " [ " .. options.delimiter .. " \r \n ] "
2016-04-02 20:42:32 +00:00
--bytes
local CR = sbyte ( " \r " )
local LF = sbyte ( " \n " )
local quote = sbyte ( ' " ' )
2020-04-04 18:47:24 +00:00
local delimiterByte = sbyte ( options.delimiter )
-- explode most used options
local headersMetamethod = options.headersMetamethod
local fieldsToKeep = options.fieldsToKeep
local ignoreQuotes = options.ignoreQuotes
local headerField = options.headerField
local endOfFile = options.endOfFile
local buffered = options.buffered
2016-03-09 12:37:25 +00:00
2020-04-04 18:47:24 +00:00
local outResults = { }
-- in the first run, the headers haven't been set yet.
2016-04-02 20:42:32 +00:00
if headerField == nil then
headerField = { }
2020-04-04 18:47:24 +00:00
-- setup a metatable to simply return the key that's passed in
local headerMeta = { __index = function ( _ , key ) return key end }
setmetatable ( headerField , headerMeta )
end
if headersMetamethod then
setmetatable ( outResults , { __newindex = headersMetamethod } )
end
outResults [ 1 ] = { }
-- totalColumnCount based on unique headers and fieldsToKeep
local totalColumnCount = options.totalColumnCount or determineTotalColumnCount ( headerField , fieldsToKeep )
local function assignValueToField ( )
if fieldsToKeep == nil or fieldsToKeep [ headerField [ fieldNum ] ] then
-- create new field
if ignoreQuotes == false and sbyte ( inputString , i - 1 ) == quote then
field = ssub ( inputString , fieldStart , i - 2 )
else
field = ssub ( inputString , fieldStart , i - 1 )
end
if doubleQuoteEscape then
field = field : gsub ( ' "" ' , ' " ' )
end
-- reset flags
doubleQuoteEscape = false
2018-05-27 16:26:48 +00:00
emptyIdentified = false
2020-04-04 18:47:24 +00:00
-- assign field in output
if headerField [ fieldNum ] ~= nil then
2016-11-06 17:36:24 +00:00
outResults [ lineNum ] [ headerField [ fieldNum ] ] = field
2018-05-31 03:32:28 +00:00
else
2020-04-04 18:47:24 +00:00
error ( ' ftcsv: too many columns in row ' .. options.rowOffset + lineNum )
2016-11-06 17:36:24 +00:00
end
end
end
2016-03-09 12:37:25 +00:00
while i <= inputLength do
2020-04-04 18:47:24 +00:00
-- go by two chars at a time,
-- currentChar is set at the bottom.
2016-04-02 13:42:11 +00:00
nextChar = sbyte ( inputString , i + 1 )
2016-03-09 12:37:25 +00:00
2016-03-19 19:04:17 +00:00
-- empty string
2020-04-04 18:47:24 +00:00
if ignoreQuotes == false and currentChar == quote and nextChar == quote then
2016-03-19 19:04:17 +00:00
skipChar = 1
fieldStart = i + 2
2018-05-27 16:26:48 +00:00
emptyIdentified = true
2016-03-19 19:04:17 +00:00
2020-04-04 18:47:24 +00:00
-- escape toggle.
2016-11-12 23:44:41 +00:00
-- This can only happen if fields have quotes around them
-- so the current "start" has to be where a quote character is.
2020-04-04 18:47:24 +00:00
elseif ignoreQuotes == false and currentChar == quote and nextChar ~= quote and fieldStart == i then
2016-03-19 19:04:17 +00:00
fieldStart = i + 1
2018-05-27 16:26:48 +00:00
-- if an empty field was identified before assignment, it means
-- that this is a quoted field that starts with escaped quotes
-- ex: """a"""
if emptyIdentified then
fieldStart = fieldStart - 2
emptyIdentified = false
end
2016-03-19 19:04:17 +00:00
skipChar = 1
2020-04-04 18:47:24 +00:00
i , doubleQuoteEscape = luaCompatibility.findClosingQuote ( i + 1 , inputLength , inputString , quote , doubleQuoteEscape )
2016-03-19 19:04:17 +00:00
2020-04-04 18:47:24 +00:00
-- create some fields
2016-03-19 19:04:17 +00:00
elseif currentChar == delimiterByte then
2020-04-04 18:47:24 +00:00
assignValueToField ( )
2016-03-19 19:04:17 +00:00
2020-04-04 18:47:24 +00:00
-- increaseFieldIndices
2016-03-19 19:04:17 +00:00
fieldNum = fieldNum + 1
fieldStart = i + 1
2016-03-09 12:37:25 +00:00
2020-04-04 18:47:24 +00:00
-- newline
elseif ( currentChar == LF or currentChar == CR ) then
assignValueToField ( )
-- handle CRLF
2016-03-19 19:04:17 +00:00
if ( currentChar == CR and nextChar == LF ) then
skipChar = 1
2020-04-04 18:47:24 +00:00
fieldStart = fieldStart + 1
2016-03-09 12:37:25 +00:00
end
2016-04-02 20:42:32 +00:00
-- incrememnt for new line
2020-04-04 18:47:24 +00:00
if fieldNum < totalColumnCount then
-- sometimes in buffered mode, the buffer starts with a newline
-- this skips the newline and lets the parsing continue.
if buffered and lineNum == 1 and fieldNum == 1 and field == " " then
fieldStart = i + 1 + skipChar
lineStart = fieldStart
else
error ( ' ftcsv: too few columns in row ' .. options.rowOffset + lineNum )
end
else
lineNum = lineNum + 1
outResults [ lineNum ] = { }
fieldNum = 1
fieldStart = i + 1 + skipChar
lineStart = fieldStart
end
elseif luaCompatibility.LuaJIT == false then
skipIndex = inputString : find ( charPatternToSkip , i )
if skipIndex then
skipChar = skipIndex - i - 1
2016-11-06 17:36:24 +00:00
end
2016-04-02 20:42:32 +00:00
2016-03-19 19:04:17 +00:00
end
2016-03-09 12:37:25 +00:00
2020-04-04 18:47:24 +00:00
-- in buffered mode and it can't find the closing quote
-- it usually means in the middle of a buffer and need to backtrack
if i == nil then
if buffered then
outResults [ lineNum ] = nil
return outResults , lineStart
else
error ( " ftcsv: can't find closing quote in row " .. options.rowOffset + lineNum ..
" . Try running with the option ignoreQuotes=true if the source incorrectly uses quotes. " )
end
end
-- Increment Counter
2016-03-09 12:37:25 +00:00
i = i + 1 + skipChar
if ( skipChar > 0 ) then
2016-04-02 13:42:11 +00:00
currentChar = sbyte ( inputString , i )
2016-03-09 12:37:25 +00:00
else
currentChar = nextChar
end
skipChar = 0
end
2020-04-04 18:47:24 +00:00
if buffered and not endOfFile then
outResults [ lineNum ] = nil
return outResults , lineStart
end
2016-04-02 20:42:32 +00:00
-- create last new field
2020-04-04 18:47:24 +00:00
assignValueToField ( )
-- remove last field if empty
if fieldNum < totalColumnCount then
-- indicates last field was really just a CRLF,
-- so, it can be removed
if fieldNum == 1 and field == " " then
outResults [ lineNum ] = nil
else
error ( ' ftcsv: too few columns in row ' .. options.rowOffset + lineNum )
end
2016-04-02 20:42:32 +00:00
end
2016-03-09 12:37:25 +00:00
2020-04-04 18:47:24 +00:00
return outResults , i , totalColumnCount
end
local function handleHeaders ( headerField , options )
-- for files where there aren't headers!
if options.headers == false then
for j = 1 , # headerField do
headerField [ j ] = j
end
2023-02-03 05:19:00 +00:00
else
-- make sure a header isn't empty if there are headers
for _ , headerName in ipairs ( headerField ) do
if # headerName == 0 then
error ( ' ftcsv: Cannot parse a file which contains empty headers ' )
end
end
2016-03-09 12:37:25 +00:00
end
2016-11-06 17:36:24 +00:00
2020-04-04 18:47:24 +00:00
-- rename fields as needed!
if options.rename then
-- basic rename (["a" = "apple"])
for j = 1 , # headerField do
if options.rename [ headerField [ j ] ] then
headerField [ j ] = options.rename [ headerField [ j ] ]
end
end
-- files without headers, but with a options.rename need to be handled too!
if # options.rename > 0 then
for j = 1 , # options.rename do
headerField [ j ] = options.rename [ j ]
end
end
end
2016-11-06 17:36:24 +00:00
2020-04-04 18:47:24 +00:00
-- apply some sweet header manipulation
if options.headerFunc then
for j = 1 , # headerField do
headerField [ j ] = options.headerFunc ( headerField [ j ] )
end
2016-03-09 12:37:25 +00:00
end
2020-04-04 18:47:24 +00:00
return headerField
2016-03-09 12:37:25 +00:00
end
2020-04-04 18:47:24 +00:00
-- load an entire file into memory
local function loadFile ( textFile , amount )
local file = io.open ( textFile , " r " )
if not file then error ( " ftcsv: File not found at " .. textFile ) end
local lines = file : read ( amount )
if amount == " *all " then
file : close ( )
2018-05-30 00:31:28 +00:00
end
2020-04-04 18:47:24 +00:00
return lines , file
2018-05-30 00:31:28 +00:00
end
2020-04-04 18:47:24 +00:00
local function initializeInputFromStringOrFile ( inputFile , options , amount )
-- handle input via string or file!
local inputString , file
if options.loadFromString then
inputString = inputFile
else
inputString , file = loadFile ( inputFile , amount )
end
-- if they sent in an empty file...
if inputString == " " then
error ( ' ftcsv: Cannot parse an empty file ' )
end
return inputString , file
end
2024-08-06 12:58:30 +00:00
local function determineArgumentOrder ( delimiter , options )
-- backwards compatibile layer
if type ( delimiter ) == " string " then
return delimiter , options
-- the new format for parseLine
elseif type ( delimiter ) == " table " then
local realDelimiter = delimiter.delimiter or " , "
return realDelimiter , delimiter
-- if nothing is specified, assume "," delimited and call it a day!
else
return " , " , nil
end
end
2020-04-04 18:47:24 +00:00
local function parseOptions ( delimiter , options , fromParseLine )
2016-04-02 20:42:32 +00:00
-- delimiter MUST be one character
assert ( # delimiter == 1 and type ( delimiter ) == " string " , " the delimiter must be of string type and exactly one character " )
local fieldsToKeep = nil
2020-04-04 18:47:24 +00:00
2016-04-02 20:42:32 +00:00
if options then
2023-01-04 02:21:09 +00:00
2024-08-06 12:58:30 +00:00
if options.headers ~= nil then
assert ( type ( options.headers ) == " boolean " , " ftcsv only takes the boolean 'true' or 'false' for the optional parameter 'headers' (default 'true'). You passed in ' " .. tostring ( options.headers ) .. " ' of type ' " .. type ( options.headers ) .. " '. " )
end
2023-01-04 02:21:09 +00:00
2024-08-06 12:58:30 +00:00
if options.rename ~= nil then
assert ( type ( options.rename ) == " table " , " ftcsv only takes in a key-value table for the optional parameter 'rename'. You passed in ' " .. tostring ( options.rename ) .. " ' of type ' " .. type ( options.rename ) .. " '. " )
end
2023-01-04 02:21:09 +00:00
2024-08-06 12:58:30 +00:00
if options.fieldsToKeep ~= nil then
assert ( type ( options.fieldsToKeep ) == " table " , " ftcsv only takes in a list (as a table) for the optional parameter 'fieldsToKeep'. You passed in ' " .. tostring ( options.fieldsToKeep ) .. " ' of type ' " .. type ( options.fieldsToKeep ) .. " '. " )
local ofieldsToKeep = options.fieldsToKeep
if ofieldsToKeep ~= nil then
fieldsToKeep = { }
for j = 1 , # ofieldsToKeep do
fieldsToKeep [ ofieldsToKeep [ j ] ] = true
2016-04-02 20:42:32 +00:00
end
end
2024-08-06 12:58:30 +00:00
if options.headers == false and options.rename == nil then
error ( " ftcsv: fieldsToKeep only works with header-less files when using the 'rename' functionality " )
2016-04-02 20:42:32 +00:00
end
2024-08-06 12:58:30 +00:00
end
2023-01-04 02:21:09 +00:00
2024-08-06 12:58:30 +00:00
if options.loadFromString ~= nil then
assert ( type ( options.loadFromString ) == " boolean " , " ftcsv only takes a boolean value for optional parameter 'loadFromString'. You passed in ' " .. tostring ( options.loadFromString ) .. " ' of type ' " .. type ( options.loadFromString ) .. " '. " )
end
if options.headerFunc ~= nil then
assert ( type ( options.headerFunc ) == " function " , " ftcsv only takes a function value for optional parameter 'headerFunc'. You passed in ' " .. tostring ( options.headerFunc ) .. " ' of type ' " .. type ( options.headerFunc ) .. " '. " )
end
if options.ignoreQuotes == nil then
options.ignoreQuotes = false
else
assert ( type ( options.ignoreQuotes ) == " boolean " , " ftcsv only takes a boolean value for optional parameter 'ignoreQuotes'. You passed in ' " .. tostring ( options.ignoreQuotes ) .. " ' of type ' " .. type ( options.ignoreQuotes ) .. " '. " )
end
2023-01-04 02:21:09 +00:00
2024-08-06 12:58:30 +00:00
if fromParseLine == true then
if options.bufferSize == nil then
options.bufferSize = 2 ^ 16
2020-04-04 18:47:24 +00:00
else
2024-08-06 12:58:30 +00:00
assert ( type ( options.bufferSize ) == " number " , " ftcsv only takes a number value for optional parameter 'bufferSize'. You passed in ' " .. tostring ( options.bufferSize ) .. " ' of type ' " .. type ( options.bufferSize ) .. " '. " )
2020-04-04 18:47:24 +00:00
end
2023-01-04 02:21:09 +00:00
2024-08-06 12:58:30 +00:00
else
if options.bufferSize ~= nil then
error ( " ftcsv: bufferSize can only be specified using 'parseLine'. When using 'parse', the entire file is read into memory " )
2020-04-04 18:47:24 +00:00
end
2024-08-06 12:58:30 +00:00
end
2023-01-04 02:21:09 +00:00
2020-04-04 18:47:24 +00:00
else
options = {
[ " headers " ] = true ,
[ " loadFromString " ] = false ,
[ " ignoreQuotes " ] = false ,
[ " bufferSize " ] = 2 ^ 16
}
2016-04-02 20:42:32 +00:00
end
2020-04-04 18:47:24 +00:00
return options , fieldsToKeep
end
local function findEndOfHeaders ( str , entireFile )
local i = 1
local quote = sbyte ( ' " ' )
local newlines = {
[ sbyte ( " \n " ) ] = true ,
[ sbyte ( " \r " ) ] = true
}
local quoted = false
local char = sbyte ( str , i )
repeat
-- this should still work for escaped quotes
-- ex: " a "" b \r\n " -- there is always a pair around the newline
if char == quote then
quoted = not quoted
end
i = i + 1
char = sbyte ( str , i )
until ( newlines [ char ] and not quoted ) or char == nil
if not entireFile and char == nil then
error ( " ftcsv: bufferSize needs to be larger to parse this file " )
2016-04-02 20:42:32 +00:00
end
2020-04-04 18:47:24 +00:00
local nextChar = sbyte ( str , i + 1 )
if nextChar == sbyte ( " \n " ) and char == sbyte ( " \r " ) then
i = i + 1
end
return i
end
local function determineBOMOffset ( inputString )
-- BOM files start with bytes 239, 187, 191
if sbyte ( inputString , 1 ) == 239
and sbyte ( inputString , 2 ) == 187
and sbyte ( inputString , 3 ) == 191 then
return 4
else
return 1
2016-11-06 17:36:24 +00:00
end
2020-04-04 18:47:24 +00:00
end
local function parseHeadersAndSetupArgs ( inputString , delimiter , options , fieldsToKeep , entireFile )
local startLine = determineBOMOffset ( inputString )
local endOfHeaderRow = findEndOfHeaders ( inputString , entireFile )
2016-11-06 17:36:24 +00:00
2020-04-04 18:47:24 +00:00
local parserArgs = {
delimiter = delimiter ,
headerField = nil ,
fieldsToKeep = nil ,
inputLength = endOfHeaderRow ,
buffered = false ,
ignoreQuotes = options.ignoreQuotes ,
rowOffset = 0
}
2017-12-01 04:46:10 +00:00
2020-04-04 18:47:24 +00:00
local rawHeaders , endOfHeaders = parseString ( inputString , startLine , parserArgs )
-- manipulate the headers as per the options
local modifiedHeaders = handleHeaders ( rawHeaders [ 1 ] , options )
parserArgs.headerField = modifiedHeaders
parserArgs.fieldsToKeep = fieldsToKeep
parserArgs.inputLength = nil
if options.headers == false then endOfHeaders = startLine end
local finalHeaders = determineRealHeaders ( modifiedHeaders , fieldsToKeep )
if options.headers ~= false then
local headersMetamethod = generateHeadersMetamethod ( finalHeaders )
parserArgs.headersMetamethod = headersMetamethod
2017-12-01 04:46:10 +00:00
end
2016-04-02 20:42:32 +00:00
2020-04-04 18:47:24 +00:00
return endOfHeaders , parserArgs , finalHeaders
end
-- runs the show!
function ftcsv . parse ( inputFile , delimiter , options )
2024-08-06 12:58:30 +00:00
local delimiter , options = determineArgumentOrder ( delimiter , options )
2020-04-04 18:47:24 +00:00
local options , fieldsToKeep = parseOptions ( delimiter , options , false )
local inputString = initializeInputFromStringOrFile ( inputFile , options , " *all " )
local endOfHeaders , parserArgs , finalHeaders = parseHeadersAndSetupArgs ( inputString , delimiter , options , fieldsToKeep , true )
local output = parseString ( inputString , endOfHeaders , parserArgs )
return output , finalHeaders
end
local function getFileSize ( file )
local current = file : seek ( )
local size = file : seek ( " end " )
file : seek ( " set " , current )
return size
end
local function determineAtEndOfFile ( file , fileSize )
if file : seek ( ) >= fileSize then
return true
else
return false
2016-11-06 17:36:24 +00:00
end
2020-04-04 18:47:24 +00:00
end
2016-11-06 17:36:24 +00:00
2020-04-04 18:47:24 +00:00
local function initializeInputFile ( inputString , options )
if options.loadFromString == true then
error ( " ftcsv: parseLine currently doesn't support loading from string " )
2016-04-02 20:42:32 +00:00
end
2020-04-04 18:47:24 +00:00
return initializeInputFromStringOrFile ( inputString , options , options.bufferSize )
end
2016-04-02 20:42:32 +00:00
2020-04-04 18:47:24 +00:00
function ftcsv . parseLine ( inputFile , delimiter , userOptions )
2024-08-06 12:58:30 +00:00
local delimiter , userOptions = determineArgumentOrder ( delimiter , userOptions )
2020-04-04 18:47:24 +00:00
local options , fieldsToKeep = parseOptions ( delimiter , userOptions , true )
local inputString , file = initializeInputFile ( inputFile , options )
local fileSize , atEndOfFile = 0 , false
fileSize = getFileSize ( file )
atEndOfFile = determineAtEndOfFile ( file , fileSize )
local endOfHeaders , parserArgs , _ = parseHeadersAndSetupArgs ( inputString , delimiter , options , fieldsToKeep , atEndOfFile )
parserArgs.buffered = true
parserArgs.endOfFile = atEndOfFile
local parsedBuffer , endOfParsedInput , totalColumnCount = parseString ( inputString , endOfHeaders , parserArgs )
parserArgs.totalColumnCount = totalColumnCount
inputString = ssub ( inputString , endOfParsedInput )
local bufferIndex , returnedRowsCount = 0 , 0
local currentRow , buffer
return function ( )
-- check parsed buffer for value
bufferIndex = bufferIndex + 1
currentRow = parsedBuffer [ bufferIndex ]
if currentRow then
returnedRowsCount = returnedRowsCount + 1
return returnedRowsCount , currentRow
2016-04-02 20:42:32 +00:00
end
2020-04-04 18:47:24 +00:00
-- read more of the input
buffer = file : read ( options.bufferSize )
if not buffer then
file : close ( )
return nil
else
parserArgs.endOfFile = determineAtEndOfFile ( file , fileSize )
2016-04-02 20:42:32 +00:00
end
2020-04-04 18:47:24 +00:00
-- appends the new input to what was left over
inputString = inputString .. buffer
-- re-analyze and load buffer
parserArgs.rowOffset = returnedRowsCount
parsedBuffer , endOfParsedInput = parseString ( inputString , 1 , parserArgs )
bufferIndex = 1
-- cut the input string down
inputString = ssub ( inputString , endOfParsedInput )
if # parsedBuffer == 0 then
error ( " ftcsv: bufferSize needs to be larger to parse this file " )
2016-04-02 20:42:32 +00:00
end
2020-04-04 18:47:24 +00:00
returnedRowsCount = returnedRowsCount + 1
return returnedRowsCount , parsedBuffer [ bufferIndex ]
end
2016-04-02 20:42:32 +00:00
end
2020-04-04 18:47:24 +00:00
-- The ENCODER code is below here
-- This could be broken out, but is kept here for portability
2016-03-09 12:37:25 +00:00
local function delimitField ( field )
2016-04-02 20:42:32 +00:00
field = tostring ( field )
2016-03-09 12:37:25 +00:00
if field : find ( ' " ' ) then
2016-03-13 06:33:31 +00:00
return field : gsub ( ' " ' , ' "" ' )
2016-03-09 12:37:25 +00:00
else
return field
end
end
2023-02-20 17:00:39 +00:00
local function generateDelimitAndQuoteField ( delimiter )
local generatedFunction = function ( field )
field = tostring ( field )
if field : find ( ' " ' ) then
return ' " ' .. field : gsub ( ' " ' , ' "" ' ) .. ' " '
elseif field : find ( ' [ \n ' .. delimiter .. ' ] ' ) then
return ' " ' .. field .. ' " '
else
return field
end
2023-02-03 05:19:00 +00:00
end
2023-02-20 17:00:39 +00:00
return generatedFunction
2023-02-03 05:19:00 +00:00
end
2020-04-04 18:47:24 +00:00
local function escapeHeadersForLuaGenerator ( headers )
local escapedHeaders = { }
2016-03-13 06:33:31 +00:00
for i = 1 , # headers do
if headers [ i ] : find ( ' " ' ) then
2020-04-04 18:47:24 +00:00
escapedHeaders [ i ] = headers [ i ] : gsub ( ' " ' , ' \\ " ' )
else
escapedHeaders [ i ] = headers [ i ]
2016-03-09 12:37:25 +00:00
end
end
2020-04-04 18:47:24 +00:00
return escapedHeaders
end
-- a function that compiles some lua code to quickly print out the csv
2023-02-03 05:19:00 +00:00
local function csvLineGenerator ( inputTable , delimiter , headers , options )
2020-04-04 18:47:24 +00:00
local escapedHeaders = escapeHeadersForLuaGenerator ( headers )
2016-03-09 12:37:25 +00:00
2016-03-13 06:33:31 +00:00
local outputFunc = [ [
2020-04-04 18:47:24 +00:00
local args , i = ...
2016-03-13 06:33:31 +00:00
i = i + 1 ;
2020-04-04 18:47:24 +00:00
if i > ] ] .. # inputTable .. [ [ then return nil end ;
return i , ' " ' .. args.delimitField ( args.t [ i ] [ " ]] ..
table.concat ( escapedHeaders , [["]) .. '"]] ..
delimiter .. [["' .. args.delimitField(args.t[i]["]] ) ..
[["]) .. '"\r\n']]
2023-02-20 17:00:39 +00:00
if options and options.onlyRequiredQuotes == true then
2023-02-03 05:19:00 +00:00
outputFunc = [ [
local args , i = ...
i = i + 1 ;
if i > ] ] .. # inputTable .. [ [ then return nil end ;
return i , args.delimitField ( args.t [ i ] [ " ]] ..
table.concat ( escapedHeaders , [["]) .. ']] ..
delimiter .. [[' .. args.delimitField(args.t[i]["]] ) ..
[["]) .. '\r\n']]
end
2020-04-04 18:47:24 +00:00
local arguments = { }
arguments.t = inputTable
-- we want to use the same delimitField throughout,
-- so we're just going to pass it in
2023-02-20 17:00:39 +00:00
if options and options.onlyRequiredQuotes == true then
arguments.delimitField = generateDelimitAndQuoteField ( delimiter )
2023-02-03 05:19:00 +00:00
else
arguments.delimitField = delimitField
end
2016-03-13 06:33:31 +00:00
2020-04-04 18:47:24 +00:00
return luaCompatibility.load ( outputFunc ) , arguments , 0
2016-03-13 06:33:31 +00:00
2020-04-04 18:47:24 +00:00
end
local function validateHeaders ( headers , inputTable )
for i = 1 , # headers do
if inputTable [ 1 ] [ headers [ i ] ] == nil then
error ( " ftcsv: the field ' " .. headers [ i ] .. " ' doesn't exist in the inputTable " )
end
end
end
2016-03-13 06:33:31 +00:00
2023-02-03 05:19:00 +00:00
local function initializeOutputWithEscapedHeaders ( escapedHeaders , delimiter , options )
2020-04-04 18:47:24 +00:00
local output = { }
2023-02-20 17:00:39 +00:00
if options and options.onlyRequiredQuotes == true then
2023-02-03 05:19:00 +00:00
output [ 1 ] = table.concat ( escapedHeaders , delimiter ) .. ' \r \n '
else
output [ 1 ] = ' " ' .. table.concat ( escapedHeaders , ' " ' .. delimiter .. ' " ' ) .. ' " \r \n '
end
2020-04-04 18:47:24 +00:00
return output
end
2016-03-13 06:33:31 +00:00
2023-02-20 17:00:39 +00:00
local function escapeHeadersForOutput ( headers , delimiter , options )
2020-04-04 18:47:24 +00:00
local escapedHeaders = { }
2023-02-03 05:19:00 +00:00
local delimitField = delimitField
2023-02-20 17:00:39 +00:00
if options and options.onlyRequiredQuotes == true then
delimitField = generateDelimitAndQuoteField ( delimiter )
2023-02-03 05:19:00 +00:00
end
2020-04-04 18:47:24 +00:00
for i = 1 , # headers do
escapedHeaders [ i ] = delimitField ( headers [ i ] )
end
2023-02-03 05:19:00 +00:00
2020-04-04 18:47:24 +00:00
return escapedHeaders
2016-03-13 06:33:31 +00:00
end
2020-04-04 18:47:24 +00:00
local function extractHeadersFromTable ( inputTable )
2016-03-15 01:12:45 +00:00
local headers = { }
2016-03-13 06:33:31 +00:00
for key , _ in pairs ( inputTable [ 1 ] ) do
headers [ # headers + 1 ] = key
2016-03-09 12:37:25 +00:00
end
2016-03-13 06:33:31 +00:00
-- lets make the headers alphabetical
table.sort ( headers )
return headers
end
2020-04-04 18:47:24 +00:00
local function getHeadersFromOptions ( options )
2016-03-13 06:33:31 +00:00
local headers = nil
if options then
2016-03-14 02:47:57 +00:00
if options.fieldsToKeep ~= nil then
2020-04-04 18:47:24 +00:00
assert (
type ( options.fieldsToKeep ) == " table " , " ftcsv only takes in a list (as a table) for the optional parameter 'fieldsToKeep'. You passed in ' " .. tostring ( options.headers ) .. " ' of type ' " .. type ( options.headers ) .. " '. " )
2016-03-14 02:47:57 +00:00
headers = options.fieldsToKeep
2016-03-09 12:37:25 +00:00
end
2016-03-13 06:33:31 +00:00
end
2020-04-04 18:47:24 +00:00
return headers
end
local function initializeGenerator ( inputTable , delimiter , options )
-- delimiter MUST be one character
assert ( # delimiter == 1 and type ( delimiter ) == " string " , " the delimiter must be of string type and exactly one character " )
local headers = getHeadersFromOptions ( options )
2016-03-13 06:33:31 +00:00
if headers == nil then
2020-04-04 18:47:24 +00:00
headers = extractHeadersFromTable ( inputTable )
2016-03-13 06:33:31 +00:00
end
2020-04-04 18:47:24 +00:00
validateHeaders ( headers , inputTable )
2016-03-13 06:33:31 +00:00
2023-02-20 17:00:39 +00:00
local escapedHeaders = escapeHeadersForOutput ( headers , delimiter , options )
2023-02-03 05:19:00 +00:00
local output = initializeOutputWithEscapedHeaders ( escapedHeaders , delimiter , options )
2020-04-04 18:47:24 +00:00
return output , headers
end
2016-03-09 12:37:25 +00:00
2020-04-04 18:47:24 +00:00
-- works really quickly with luajit-2.1, because table.concat life
function ftcsv . encode ( inputTable , delimiter , options )
2024-08-14 15:38:42 +00:00
local delimiter , options = determineArgumentOrder ( delimiter , options )
2020-04-04 18:47:24 +00:00
local output , headers = initializeGenerator ( inputTable , delimiter , options )
2023-02-03 05:19:00 +00:00
for i , line in csvLineGenerator ( inputTable , delimiter , headers , options ) do
2016-03-13 06:33:31 +00:00
output [ i + 1 ] = line
end
2020-04-04 18:47:24 +00:00
-- combine and return final string
2016-03-13 06:33:31 +00:00
return table.concat ( output )
2016-03-09 12:37:25 +00:00
end
return ftcsv
2016-03-15 01:12:45 +00:00