2016-03-15 01:12:45 +00:00
local ftcsv = {
2017-12-01 04:02:49 +00:00
_VERSION = ' ftcsv 1.1.4 ' ,
2016-03-15 01:12:45 +00:00
_DESCRIPTION = ' CSV library for Lua ' ,
_URL = ' https://github.com/FourierTransformer/ftcsv ' ,
_LICENSE = [ [
The MIT License ( MIT )
Copyright ( c ) 2016 Shakil Thakur
Permission is hereby granted , free of charge , to any person obtaining a copy
of this software and associated documentation files ( the " Software " ) , to deal
in the Software without restriction , including without limitation the rights
to use , copy , modify , merge , publish , distribute , sublicense , and / or sell
copies of the Software , and to permit persons to whom the Software is
furnished to do so , subject to the following conditions :
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software .
THE SOFTWARE IS PROVIDED " AS IS " , WITHOUT WARRANTY OF ANY KIND , EXPRESS OR
IMPLIED , INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY ,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT . IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM , DAMAGES OR OTHER
LIABILITY , WHETHER IN AN ACTION OF CONTRACT , TORT OR OTHERWISE , ARISING FROM ,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE .
] ]
}
2016-03-19 18:27:47 +00:00
-- lua 5.1 load compat
2016-03-15 01:12:45 +00:00
local M = { }
if type ( jit ) == ' table ' or _ENV then
M.load = _G.load
else
M.load = loadstring
end
2016-03-09 12:37:25 +00:00
2016-04-02 13:42:11 +00:00
-- perf
local sbyte = string.byte
local ssub = string.sub
2016-03-19 18:27:47 +00:00
-- luajit specific speedups
-- luajit performs faster with iterating over string.byte,
-- whereas vanilla lua performs faster with string.find
if type ( jit ) == ' table ' then
-- finds the end of an escape sequence
function M . findClosingQuote ( i , inputLength , inputString , quote , doubleQuoteEscape )
-- local doubleQuoteEscape = doubleQuoteEscape
2016-04-02 13:42:11 +00:00
local currentChar , nextChar = sbyte ( inputString , i ) , nil
2016-03-19 18:27:47 +00:00
while i <= inputLength do
-- print(i)
2016-04-02 13:42:11 +00:00
nextChar = sbyte ( inputString , i + 1 )
2016-03-19 18:27:47 +00:00
-- this one deals with " double quotes that are escaped "" within single quotes "
-- these should be turned into a single quote at the end of the field
if currentChar == quote and nextChar == quote then
doubleQuoteEscape = true
i = i + 2
2016-04-02 13:42:11 +00:00
currentChar = sbyte ( inputString , i )
2016-03-19 18:27:47 +00:00
-- identifies the escape toggle
elseif currentChar == quote and nextChar ~= quote then
-- print("exiting", i-1)
return i - 1 , doubleQuoteEscape
else
i = i + 1
currentChar = nextChar
end
end
end
else
-- vanilla lua closing quote finder
function M . findClosingQuote ( i , inputLength , inputString , quote , doubleQuoteEscape )
local firstCharIndex = 1
local firstChar , iChar = nil , nil
repeat
firstCharIndex , i = inputString : find ( ' ".? ' , i + 1 )
2016-04-02 13:42:11 +00:00
firstChar = sbyte ( inputString , firstCharIndex )
iChar = sbyte ( inputString , i )
2016-03-19 18:27:47 +00:00
-- nextChar = string.byte(inputString, i+1)
-- print("HI", offset, i)
-- print(firstChar, iChar)
if firstChar == quote and iChar == quote then
doubleQuoteEscape = true
end
until iChar ~= quote
if i == nil then
return inputLength - 1 , doubleQuoteEscape
end
-- print("exiting", i-2)
return i - 2 , doubleQuoteEscape
end
end
2016-03-13 06:33:31 +00:00
-- load an entire file into memory
local function loadFile ( textFile )
local file = io.open ( textFile , " r " )
2016-11-06 17:36:24 +00:00
if not file then error ( " ftcsv: File not found at " .. textFile ) end
2016-03-13 06:33:31 +00:00
local allLines = file : read ( " *all " )
file : close ( )
return allLines
end
-- creates a new field and adds it to the main table
2016-04-02 20:42:32 +00:00
local function createField ( inputString , quote , fieldStart , i , doubleQuoteEscape )
local field
2016-03-09 12:37:25 +00:00
-- so, if we just recently de-escaped, we don't want the trailing \"
2016-04-02 20:42:32 +00:00
if sbyte ( inputString , i - 1 ) == quote then
-- print("Skipping last \"")
field = ssub ( inputString , fieldStart , i - 2 )
else
field = ssub ( inputString , fieldStart , i - 1 )
2016-03-09 12:37:25 +00:00
end
2016-04-02 20:42:32 +00:00
if doubleQuoteEscape then
-- print("QUOTE REPLACE")
-- print(line[fieldNum])
field = field : gsub ( ' "" ' , ' " ' )
2016-03-09 12:37:25 +00:00
end
2016-04-02 20:42:32 +00:00
return field
2016-03-09 12:37:25 +00:00
end
2016-03-13 06:33:31 +00:00
-- main function used to parse
2016-04-02 20:42:32 +00:00
local function parseString ( inputString , inputLength , delimiter , i , headerField , fieldsToKeep )
2016-03-09 12:37:25 +00:00
2016-04-02 20:42:32 +00:00
-- keep track of my chars!
local currentChar , nextChar = sbyte ( inputString , i ) , nil
local skipChar = 0
local field
local fieldStart = i
local fieldNum = 1
local lineNum = 1
local doubleQuoteEscape = false
local exit = false
--bytes
local CR = sbyte ( " \r " )
local LF = sbyte ( " \n " )
local quote = sbyte ( ' " ' )
2016-04-02 13:42:11 +00:00
local delimiterByte = sbyte ( delimiter )
2016-03-09 12:37:25 +00:00
2016-04-02 20:42:32 +00:00
local assignValue
local outResults
-- the headers haven't been set yet.
-- aka this is the first run!
if headerField == nil then
-- print("this is for headers")
headerField = { }
assignValue = function ( )
headerField [ fieldNum ] = field
return true
2016-03-09 12:37:25 +00:00
end
2016-03-13 06:33:31 +00:00
else
2016-04-02 20:42:32 +00:00
-- print("this is for magic")
outResults = { }
outResults [ 1 ] = { }
assignValue = function ( )
2016-11-06 17:36:24 +00:00
if not pcall ( function ( )
outResults [ lineNum ] [ headerField [ fieldNum ] ] = field
end ) then
error ( ' ftcsv: too many columns in row ' .. lineNum )
end
end
end
-- calculate the initial line count (note: this can include duplicates)
local headerFieldsExist = { }
local initialLineCount = 0
for _ , value in pairs ( headerField ) do
if not headerFieldsExist [ value ] and ( fieldsToKeep == nil or fieldsToKeep [ value ] ) then
headerFieldsExist [ value ] = true
initialLineCount = initialLineCount + 1
2016-04-02 20:42:32 +00:00
end
2016-03-09 12:37:25 +00:00
end
while i <= inputLength do
2016-03-19 19:04:17 +00:00
-- go by two chars at a time! currentChar is set at the bottom.
2016-03-09 12:37:25 +00:00
-- currentChar = string.byte(inputString, i)
2016-04-02 13:42:11 +00:00
nextChar = sbyte ( inputString , i + 1 )
2016-03-09 12:37:25 +00:00
-- print(i, string.char(currentChar), string.char(nextChar))
2016-03-19 19:04:17 +00:00
-- empty string
if currentChar == quote and nextChar == quote then
-- print("EMPTY STRING")
skipChar = 1
fieldStart = i + 2
-- print("fs+2:", fieldStart)
2016-11-12 23:44:41 +00:00
-- identifies the escape toggle.
-- This can only happen if fields have quotes around them
-- so the current "start" has to be where a quote character is.
elseif currentChar == quote and nextChar ~= quote and fieldStart == i then
-- print("New Quoted Field", i)
2016-03-19 19:04:17 +00:00
fieldStart = i + 1
i , doubleQuoteEscape = M.findClosingQuote ( i + 1 , inputLength , inputString , quote , doubleQuoteEscape )
-- print("I VALUE", i, doubleQuoteEscape)
skipChar = 1
-- create some fields if we can!
elseif currentChar == delimiterByte then
-- create the new field
-- print(headerField[fieldNum])
2016-04-02 20:42:32 +00:00
if fieldsToKeep == nil or fieldsToKeep [ headerField [ fieldNum ] ] then
field = createField ( inputString , quote , fieldStart , i , doubleQuoteEscape )
-- print("FIELD", field, "FIELDEND", headerField[fieldNum], lineNum)
-- outResults[headerField[fieldNum]][lineNum] = field
assignValue ( )
end
doubleQuoteEscape = false
2016-03-19 19:04:17 +00:00
fieldNum = fieldNum + 1
fieldStart = i + 1
-- print("fs+1:", fieldStart)
-- end
-- newline?!
2017-12-01 03:58:36 +00:00
elseif ( currentChar == CR or currentChar == LF ) then
2016-04-02 20:42:32 +00:00
if fieldsToKeep == nil or fieldsToKeep [ headerField [ fieldNum ] ] then
-- create the new field
field = createField ( inputString , quote , fieldStart , i , doubleQuoteEscape )
-- outResults[headerField[fieldNum]][lineNum] = field
exit = assignValue ( )
if exit then
if ( currentChar == CR and nextChar == LF ) then
return headerField , i + 1
else
return headerField , i
2016-03-14 02:47:57 +00:00
end
2016-03-09 12:37:25 +00:00
end
2016-03-19 19:04:17 +00:00
end
2016-04-02 20:42:32 +00:00
doubleQuoteEscape = false
2016-03-09 12:37:25 +00:00
2016-04-02 20:42:32 +00:00
-- determine how line ends
2016-03-19 19:04:17 +00:00
if ( currentChar == CR and nextChar == LF ) then
-- print("CRLF DETECTED")
skipChar = 1
fieldStart = fieldStart + 1
2016-03-09 12:37:25 +00:00
-- print("fs:", fieldStart)
end
2016-04-02 20:42:32 +00:00
-- incrememnt for new line
2016-11-06 17:36:24 +00:00
if fieldNum < initialLineCount then
error ( ' ftcsv: too few columns in row ' .. lineNum )
end
2016-04-02 20:42:32 +00:00
lineNum = lineNum + 1
outResults [ lineNum ] = { }
fieldNum = 1
fieldStart = i + 1 + skipChar
-- print("fs:", fieldStart)
2016-03-19 19:04:17 +00:00
end
2016-03-09 12:37:25 +00:00
i = i + 1 + skipChar
if ( skipChar > 0 ) then
2016-04-02 13:42:11 +00:00
currentChar = sbyte ( inputString , i )
2016-03-09 12:37:25 +00:00
else
currentChar = nextChar
end
skipChar = 0
end
2016-04-02 20:42:32 +00:00
-- create last new field
if fieldsToKeep == nil or fieldsToKeep [ headerField [ fieldNum ] ] then
field = createField ( inputString , quote , fieldStart , i , doubleQuoteEscape )
assignValue ( )
end
2016-03-09 12:37:25 +00:00
-- clean up last line if it's weird (this happens when there is a CRLF newline at end of file)
-- doing a count gets it to pick up the oddballs
2016-03-13 06:33:31 +00:00
local finalLineCount = 0
2016-11-06 17:36:24 +00:00
local lastValue = nil
for k , v in pairs ( outResults [ lineNum ] ) do
2016-03-13 06:33:31 +00:00
finalLineCount = finalLineCount + 1
2016-11-06 17:36:24 +00:00
lastValue = v
2016-03-09 12:37:25 +00:00
end
2016-11-06 17:36:24 +00:00
-- this indicates a CRLF
2016-03-13 06:33:31 +00:00
-- print("Final/Initial", finalLineCount, initialLineCount)
2016-11-06 17:36:24 +00:00
if finalLineCount == 1 and lastValue == " " then
2016-03-09 12:37:25 +00:00
outResults [ lineNum ] = nil
2016-11-06 17:36:24 +00:00
-- otherwise there might not be enough line
elseif finalLineCount < initialLineCount then
error ( ' ftcsv: too few columns in row ' .. lineNum )
2016-03-09 12:37:25 +00:00
end
return outResults
end
2016-04-02 20:42:32 +00:00
-- runs the show!
function ftcsv . parse ( inputFile , delimiter , options )
-- delimiter MUST be one character
assert ( # delimiter == 1 and type ( delimiter ) == " string " , " the delimiter must be of string type and exactly one character " )
-- OPTIONS yo
local header = true
local rename
local fieldsToKeep = nil
local loadFromString = false
local headerFunc
if options then
if options.headers ~= nil then
assert ( type ( options.headers ) == " boolean " , " ftcsv only takes the boolean 'true' or 'false' for the optional parameter 'headers' (default 'true'). You passed in ' " .. tostring ( options.headers ) .. " ' of type ' " .. type ( options.headers ) .. " '. " )
header = options.headers
end
if options.rename ~= nil then
assert ( type ( options.rename ) == " table " , " ftcsv only takes in a key-value table for the optional parameter 'rename'. You passed in ' " .. tostring ( options.rename ) .. " ' of type ' " .. type ( options.rename ) .. " '. " )
rename = options.rename
end
if options.fieldsToKeep ~= nil then
assert ( type ( options.fieldsToKeep ) == " table " , " ftcsv only takes in a list (as a table) for the optional parameter 'fieldsToKeep'. You passed in ' " .. tostring ( options.fieldsToKeep ) .. " ' of type ' " .. type ( options.fieldsToKeep ) .. " '. " )
local ofieldsToKeep = options.fieldsToKeep
if ofieldsToKeep ~= nil then
fieldsToKeep = { }
for j = 1 , # ofieldsToKeep do
fieldsToKeep [ ofieldsToKeep [ j ] ] = true
end
end
2016-11-06 17:36:24 +00:00
if header == false and options.rename == nil then
error ( " ftcsv: fieldsToKeep only works with header-less files when using the 'rename' functionality " )
2016-04-02 20:42:32 +00:00
end
end
if options.loadFromString ~= nil then
assert ( type ( options.loadFromString ) == " boolean " , " ftcsv only takes a boolean value for optional parameter 'loadFromString'. You passed in ' " .. tostring ( options.loadFromString ) .. " ' of type ' " .. type ( options.loadFromString ) .. " '. " )
loadFromString = options.loadFromString
end
if options.headerFunc ~= nil then
assert ( type ( options.headerFunc ) == " function " , " ftcsv only takes a function value for optional parameter 'headerFunc'. You passed in ' " .. tostring ( options.headerFunc ) .. " ' of type ' " .. type ( options.headerFunc ) .. " '. " )
headerFunc = options.headerFunc
end
end
-- handle input via string or file!
local inputString
if loadFromString then
inputString = inputFile
else
inputString = loadFile ( inputFile )
end
local inputLength = # inputString
2016-11-06 17:36:24 +00:00
-- if they sent in an empty file...
if inputLength == 0 then
error ( ' ftcsv: Cannot parse an empty file ' )
end
2016-04-02 20:42:32 +00:00
-- parse through the headers!
2017-12-01 04:46:10 +00:00
local startLine = 1
-- check for BOM
if string.byte ( inputString , 1 ) == 239 and string.byte ( inputString , 2 ) == 187 and string.byte ( inputString , 3 ) == 191 then
startLine = 4
end
local headerField , i = parseString ( inputString , inputLength , delimiter , startLine )
2016-04-02 20:42:32 +00:00
i = i + 1 -- start at the next char
2016-11-06 17:36:24 +00:00
-- make sure a header isn't empty
for _ , header in ipairs ( headerField ) do
if # header == 0 then
error ( ' ftcsv: Cannot parse a file which contains empty headers ' )
end
end
2016-04-02 20:42:32 +00:00
-- for files where there aren't headers!
if header == false then
i = 0
for j = 1 , # headerField do
headerField [ j ] = j
end
end
-- rename fields as needed!
if rename then
-- basic rename (["a" = "apple"])
for j = 1 , # headerField do
if rename [ headerField [ j ] ] then
-- print("RENAMING", headerField[j], rename[headerField[j]])
headerField [ j ] = rename [ headerField [ j ] ]
end
end
-- files without headers, but with a rename need to be handled too!
if # rename > 0 then
for j = 1 , # rename do
headerField [ j ] = rename [ j ]
end
end
end
2016-11-06 17:36:24 +00:00
-- apply some sweet header manipulation
2016-04-02 20:42:32 +00:00
if headerFunc then
for j = 1 , # headerField do
headerField [ j ] = headerFunc ( headerField [ j ] )
end
end
local output = parseString ( inputString , inputLength , delimiter , i , headerField , fieldsToKeep )
return output
end
2016-03-13 06:33:31 +00:00
-- a function that delimits " to "", used by the writer
2016-03-09 12:37:25 +00:00
local function delimitField ( field )
2016-04-02 20:42:32 +00:00
field = tostring ( field )
2016-03-09 12:37:25 +00:00
if field : find ( ' " ' ) then
2016-03-13 06:33:31 +00:00
return field : gsub ( ' " ' , ' "" ' )
2016-03-09 12:37:25 +00:00
else
return field
end
end
2016-03-13 06:33:31 +00:00
-- a function that compiles some lua code to quickly print out the csv
local function writer ( inputTable , dilimeter , headers )
-- they get re-created here if they need to be escaped so lua understands it based on how
-- they came in
for i = 1 , # headers do
if inputTable [ 1 ] [ headers [ i ] ] == nil then
2016-11-06 17:36:24 +00:00
error ( " ftcsv: the field ' " .. headers [ i ] .. " ' doesn't exist in the inputTable " )
2016-03-13 06:33:31 +00:00
end
if headers [ i ] : find ( ' " ' ) then
headers [ i ] = headers [ i ] : gsub ( ' " ' , ' \\ " ' )
2016-03-09 12:37:25 +00:00
end
end
2016-03-13 06:33:31 +00:00
local outputFunc = [ [
local state , i = ...
local d = state.delimitField
i = i + 1 ;
if i > state.tableSize then return nil end ;
return i , ' " ' .. d ( state.t [ i ] [ " ]] .. table.concat(headers, [[ " ] ) .. ' "]] .. dilimeter .. [[" ' .. d ( state.t [ i ] [ " ]]) .. [[ " ] ) .. ' " \r \n ' ] ]
-- print(outputFunc)
local state = { }
state.t = inputTable
state.tableSize = # inputTable
state.delimitField = delimitField
2016-03-15 01:12:45 +00:00
return M.load ( outputFunc ) , state , 0
2016-03-13 06:33:31 +00:00
end
-- takes the values from the headers in the first row of the input table
local function extractHeaders ( inputTable )
2016-03-15 01:12:45 +00:00
local headers = { }
2016-03-13 06:33:31 +00:00
for key , _ in pairs ( inputTable [ 1 ] ) do
headers [ # headers + 1 ] = key
2016-03-09 12:37:25 +00:00
end
2016-03-13 06:33:31 +00:00
-- lets make the headers alphabetical
table.sort ( headers )
return headers
end
-- turns a lua table into a csv
-- works really quickly with luajit-2.1, because table.concat life
function ftcsv . encode ( inputTable , delimiter , options )
local output = { }
-- dilimeter MUST be one character
assert ( # delimiter == 1 and type ( delimiter ) == " string " , " the delimiter must be of string type and exactly one character " )
-- grab the headers from the options if they are there
local headers = nil
if options then
2016-03-14 02:47:57 +00:00
if options.fieldsToKeep ~= nil then
assert ( type ( options.fieldsToKeep ) == " table " , " ftcsv only takes in a list (as a table) for the optional parameter 'fieldsToKeep'. You passed in ' " .. tostring ( options.headers ) .. " ' of type ' " .. type ( options.headers ) .. " '. " )
headers = options.fieldsToKeep
2016-03-09 12:37:25 +00:00
end
2016-03-13 06:33:31 +00:00
end
if headers == nil then
headers = extractHeaders ( inputTable )
end
-- newHeaders are needed if there are quotes within the header
-- because they need to be escaped
local newHeaders = { }
for i = 1 , # headers do
if headers [ i ] : find ( ' " ' ) then
newHeaders [ i ] = headers [ i ] : gsub ( ' " ' , ' "" ' )
else
newHeaders [ i ] = headers [ i ]
2016-03-09 12:37:25 +00:00
end
end
2016-10-20 18:20:12 +00:00
output [ 1 ] = ' " ' .. table.concat ( newHeaders , ' " ' .. delimiter .. ' " ' ) .. ' " \r \n '
2016-03-09 12:37:25 +00:00
2016-03-13 06:33:31 +00:00
-- add each line by line.
for i , line in writer ( inputTable , delimiter , headers ) do
output [ i + 1 ] = line
end
return table.concat ( output )
2016-03-09 12:37:25 +00:00
end
return ftcsv
2016-03-15 01:12:45 +00:00