From e34c08b772a1e59d715af43812989f8d72cd3731 Mon Sep 17 00:00:00 2001 From: FourierTransformer Date: Sat, 19 Mar 2016 13:27:47 -0500 Subject: [PATCH] major vanilla lua speedups --- ftcsv.lua | 84 ++++++++++++++++++++++++++------------ spec/parse_encode_spec.lua | 1 + 2 files changed, 58 insertions(+), 27 deletions(-) diff --git a/ftcsv.lua b/ftcsv.lua index 62a1eee..13b8396 100644 --- a/ftcsv.lua +++ b/ftcsv.lua @@ -27,7 +27,7 @@ local ftcsv = { ]] } --- lua 5.1 compat +-- lua 5.1 load compat local M = {} if type(jit) == 'table' or _ENV then M.load = _G.load @@ -35,6 +35,61 @@ else M.load = loadstring end +-- luajit specific speedups +-- luajit performs faster with iterating over string.byte, +-- whereas vanilla lua performs faster with string.find +if type(jit) == 'table' then + -- finds the end of an escape sequence + function M.findClosingQuote(i, inputLength, inputString, quote, doubleQuoteEscape) + -- local doubleQuoteEscape = doubleQuoteEscape + local currentChar, nextChar = string.byte(inputString, i), nil + while i <= inputLength do + -- print(i) + nextChar = string.byte(inputString, i+1) + + -- this one deals with " double quotes that are escaped "" within single quotes " + -- these should be turned into a single quote at the end of the field + if currentChar == quote and nextChar == quote then + doubleQuoteEscape = true + i = i + 2 + currentChar = string.byte(inputString, i) + + -- identifies the escape toggle + elseif currentChar == quote and nextChar ~= quote then + -- print("exiting", i-1) + return i-1, doubleQuoteEscape + else + i = i + 1 + currentChar = nextChar + end + end + end + +else + -- vanilla lua closing quote finder + function M.findClosingQuote(i, inputLength, inputString, quote, doubleQuoteEscape) + local firstCharIndex = 1 + local firstChar, iChar = nil, nil + repeat + firstCharIndex, i = inputString:find('".?', i+1) + firstChar = string.byte(inputString, firstCharIndex) + iChar = string.byte(inputString, i) + -- nextChar = string.byte(inputString, i+1) + -- print("HI", offset, i) + -- print(firstChar, iChar) + if firstChar == quote and iChar == quote then + doubleQuoteEscape = true + end + until iChar ~= quote + if i == nil then + return inputLength-1, doubleQuoteEscape + end + -- print("exiting", i-2) + return i-2, doubleQuoteEscape + end + +end + -- load an entire file into memory local function loadFile(textFile) local file = io.open(textFile, "r") @@ -44,31 +99,6 @@ local function loadFile(textFile) return allLines end --- finds the end of an escape sequence -local function findClosingQuote(i, inputLength, inputString, quote, doubleQuoteEscape) - -- local doubleQuoteEscape = doubleQuoteEscape - local currentChar, nextChar = string.byte(inputString, i), nil - while i <= inputLength do - -- print(i) - nextChar = string.byte(inputString, i+1) - - -- this one deals with " double quotes that are escaped "" within single quotes " - -- these should be turned into a single quote at the end of the field - if currentChar == quote and nextChar == quote then - doubleQuoteEscape = true - i = i + 2 - currentChar = string.byte(inputString, i) - - -- identifies the escape toggle - elseif currentChar == quote and nextChar ~= quote then - return i-1, doubleQuoteEscape - else - i = i + 1 - currentChar = nextChar - end - end -end - -- creates a new field and adds it to the main table local function createNewField(inputString, quote, fieldStart, i, line, fieldNum, doubleQuoteEscape, fieldsToKeep) -- print(lineNum, fieldNum, fieldStart, i-1) @@ -193,7 +223,7 @@ function ftcsv.parse(inputFile, delimiter, options) elseif currentChar == quote and nextChar ~= quote then -- print("ESCAPE TOGGLE") fieldStart = i + 1 - i, doubleQuoteEscape = findClosingQuote(i+1, inputLength, inputString, quote, doubleQuoteEscape) + i, doubleQuoteEscape = M.findClosingQuote(i+1, inputLength, inputString, quote, doubleQuoteEscape) -- print("I VALUE", i, doubleQuoteEscape) skipChar = 1 -- end diff --git a/spec/parse_encode_spec.lua b/spec/parse_encode_spec.lua index 455f39f..b336c01 100644 --- a/spec/parse_encode_spec.lua +++ b/spec/parse_encode_spec.lua @@ -34,6 +34,7 @@ describe("csv decode", function() local json = loadFile("spec/json/" .. value .. ".json") json = cjson.decode(json) local parse = ftcsv.parse("spec/csvs/" .. value .. ".csv", ",") + assert.are.same(#json, #parse) assert.are.same(json, parse) end) end