From e89d8b8a9b6fd14dcf3281d66cd59100dd3f066b Mon Sep 17 00:00:00 2001 From: Paul Kulchenko Date: Tue, 12 Jun 2012 23:17:44 -0700 Subject: [PATCH] Added options to configure serialization process --- README.md | 79 ++++++++++++++++++++++++++++++++++++++++++++----- src/serpent.lua | 58 ++++++++++++++++++++++-------------- t/bench.lua | 2 +- t/test.lua | 15 ++++++---- 4 files changed, 119 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index 0cdd1a1..38a229d 100644 --- a/README.md +++ b/README.md @@ -8,10 +8,11 @@ Lua serializer and pretty printer. * Provides single-line and multi-line output. * Nested tables are properly indented in the multi-line output. * Numerical keys are listed first. + * Keys are (optionally) sorted alphanumerically. * Array part skips keys (`{'a', 'b'}` instead of `{[1] = 'a', [2] = 'b'}`). * `nil` values are included when expected (`{1, nil, 3}` instead of `{1, [3]=3}`). * Keys use short notation (`{foo = 'foo'}` instead of `{['foo'] = 'foo'}`). - * Shared and self-references are marked in the output. + * Shared references and self-references are marked in the output. * Machine readable: provides reliable deserialization using `loadstring()`. * Supports deeply nested tables. * Supports tables with self-references. @@ -19,6 +20,7 @@ Lua serializer and pretty printer. * Supports function serialization using `string.dump()`. * Supports serialization of global functions. * Escapes new-line `\010` and end-of-file control `\026` characters in strings. +* Configurable with options and custom formatters. ## Usage @@ -27,14 +29,67 @@ local serpent = require("serpent") local a = {1, nil, 3, x=1, ['true'] = 2, [not true]=3} a[a] = a -- self-reference with a table as key and value -print(serpent.serialize(a)) -- full serialization -print(serpent.printsing(a)) -- single line, no self-ref section -print(serpent.printmult(a)) -- multi-line indented, no self-ref section +print(serpent.dump(a)) -- full serialization +print(serpent.line(a)) -- single line, no self-ref section +print(serpent.block(a)) -- multi-line indented, no self-ref section -local fun, err = loadstring(serpent.serialize(a)) +local fun, err = loadstring(serpent.dump(a)) if err then error(err) end local copy = fun() +``` +## Functions + +Serpent provides three functions that are shortcuts to the same +internal function, but set different options by default: + +* `dump(a[, {...}])` -- full serialization; sets `name`, `compact` and `sparse` options +* `line(a[, {...}])` -- single line, no self-ref section; sets `sortkeys` and `comment` options +* `block(a[, {...}])` -- multi-line indented, no self-ref section; sets `indent`, `sortkeys`, and `comment` options + +## Options + +* name (string) -- name; triggers full serialization with self-ref section +* indent (string) -- indentation; triggers long multi-line output +* comment (true/False) -- provide stringified value in a comment +* sortkeys (true/False) -- sort keys +* sparse (true/False) -- force sparse encoding (no nil filling based on #t) +* compact (true/False) -- remove spaces +* fatal (true/False) -- raise fatal error on non-serilizable values +* nocode (true/False) -- disable bytecode serialization for easy comparison +* nohuge (true/False) -- disable checking numbers against undefined and huge values +* custom (function) -- provide custom output for tables + +These options can be provided as a second parameter to Serpent functions. + +```lua +block(a, {fatal = true}) +line(a, {nocode = true}) +function todiff(a) return dump(a, {nocode = true, indent = ' '}) end +``` + +## Formatters + +Serpent supports a way to provide a custom formatter that allows to fully +customize the output. For example, the following call will apply +`Foo{bar} notation to its output (used by Metalua to display ASTs): + +```lua +print((require "serpent").block(ast, {comment = false, custom = + function(tag,head,body,tail) + local out = head..body..tail + if tag:find('^lineinfo') then + out = out:gsub("\n%s+", "") -- collapse lineinfo to one line + elseif tag == '' then + body = body:gsub('%s*lineinfo = [^\n]+', '') + local _,_,atag = body:find('tag = "(%w+)"%s*$') + if atag then + out = "`"..atag..head.. body:gsub('%s*tag = "%w+"%s*$', '')..tail + out = out:gsub("\n%s+", ""):gsub(",}","}") + else out = head..body..tail end + end + return tag..out + end})) ``` ## Limitations @@ -60,8 +115,6 @@ strings (to address http://lua-users.org/lists/lua-l/2007-07/msg00362.html, which is already fixed in Lua 5.2) and to check all numbers for `math.huge`. The seconds number excludes this processing to put it on an equal footing with other modules that skip these checks (`nucleo` still checks for `math.huge`). -There is no switch to disable this processing though as without it there is -no guarantee that the generated string is deserializable. ## Author @@ -70,3 +123,15 @@ Paul Kulchenko (paul@kulchenko.com) ## License See LICENSE file. + +## History + +Jun 12 2012 v0.12 + - Added options to configure serialization process. + - Added 'goto' to the list of keywords for Lua 5.2. + - Changed interface to dump/line/block methods. + - Changed 'math.huge' to 1/0 for better portability. + - Replaced \010 with \n for better readability. + +Jun 03 2012 v0.10 + - First public release. diff --git a/src/serpent.lua b/src/serpent.lua index fb00df2..952bc56 100644 --- a/src/serpent.lua +++ b/src/serpent.lua @@ -1,5 +1,5 @@ -local n, v, c, d = "serpent", 0.1, -- (C) 2012 Paul Kulchenko; MIT License - "Paul Kulchenko", "Serialization and pretty printing of Lua data types" +local n, v = "serpent", 0.12 -- (C) 2012 Paul Kulchenko; MIT License +local c, d = "Paul Kulchenko", "Serializer and pretty printer of Lua data types" local snum = {[tostring(1/0)]='1/0 --[[math.huge]]',[tostring(-1/0)]='-1/0 --[[-math.huge]]',[tostring(0/0)]='0/0'} local badtype = {thread = true, userdata = true} local keyword, globals, G = {}, {}, (_G or _ENV) @@ -10,13 +10,16 @@ for k,v in pairs(G) do globals[v] = k end -- build func to name mapping for _,g in ipairs({'coroutine', 'debug', 'io', 'math', 'string', 'table', 'os'}) do for k,v in pairs(G[g]) do globals[v] = g..'.'..k end end -local function serialize(t, name, indent, fatal) +local function s(t, opts) + local name, indent, fatal = opts['name'], opts['indent'], opts['fatal'] + local sparse, nocode, custom = opts['sparse'], opts['nocode'], opts['custom'] + local huge, space = not opts['nohuge'], (opts['compact'] and '' or ' ') local seen, sref = {}, {} local function gensym(val) return tostring(val):gsub("[^%w]","") end - local function safestr(s) return type(s) == "number" and (snum[tostring(s)] or s) + local function safestr(s) return type(s) == "number" and (huge and snum[tostring(s)] or s) or type(s) ~= "string" and tostring(s) -- escape NEWLINE/010 and EOF/026 or ("%q"):format(s):gsub("\010","n"):gsub("\026","\\026") end - local function comment(s) return ' --[['..tostring(s)..']]' end + local function comment(s) return opts['comment'] and ' --[['..tostring(s)..']]' or '' end local function globerr(s) return globals[s] and globals[s]..comment(s) or not fatal and safestr(tostring(s))..' --[[err]]' or error("Can't serialize "..tostring(s)) end local function safename(path, name) -- generates foo.bar, foo[3], or foo['b a r'] @@ -25,51 +28,62 @@ local function serialize(t, name, indent, fatal) local safe = plain and n or '['..safestr(n)..']' return (path or '')..(plain and path and '.' or '')..safe, safe end + local function alphanumsort(o, n) + local maxn = tonumber(n) or 12 + local function padnum(d) return ("%0"..maxn.."d"):format(d) end + table.sort(o, function(a,b) + return tostring(a):gsub("%d+",padnum) < tostring(b):gsub("%d+",padnum) end) + end local function val2str(t, name, indent, path, plainindex, level) local ttype, level = type(t), (level or 0) local spath, sname = safename(path, name) - local tag = plainindex and ((type(name) == "number") and '' or name..' = ') - or (name ~= nil and sname..' = ' or '') + local tag = plainindex and + ((type(name) == "number") and '' or name..space..'='..space) or + (name ~= nil and sname..space..'='..space or '') if seen[t] then - table.insert(sref, spath..' = '..seen[t]) + table.insert(sref, spath..space..'='..space..seen[t]) return tag..'nil --[[ref]]' elseif badtype[ttype] then return tag..globerr(t) elseif ttype == 'function' then seen[t] = spath local ok, res = pcall(string.dump, t) - local func = ok and "loadstring("..safestr(res)..",'@serialized')"..comment(t) + local func = ok and (nocode and "function()error('dummy')end" or + "loadstring("..safestr(res)..",'@serialized')"..comment(t)) return tag..(func or globerr(t)) elseif ttype == "table" then seen[t] = spath if next(t) == nil then return tag..'{}'..comment(t) end -- table empty local maxn, o, out = #t, {}, {} - for key = 1, maxn do table.insert(o, key) end -- first array part + for key = 1, maxn do -- first array part + if t[key] or not sparse then table.insert(o, key) end end for key in pairs(t) do -- then hash part (skip array keys up to maxn) - if type(key) ~= "number" or key > maxn then - table.insert(o, key) end end + if type(key) ~= "number" or key > maxn then table.insert(o, key) end end + if opts['sortkeys'] then alphanumsort(o, opts['sortkeys']) end for n, key in ipairs(o) do - local value, ktype, plainindex = t[key], type(key), n <= maxn + local value, ktype, plainindex = t[key], type(key), n <= maxn and not sparse if badtype[ktype] then plainindex, key = true, '['..globerr(key)..']' end if ktype == 'table' or ktype == 'function' then if not seen[key] and not globals[key] then table.insert(sref, 'local '..val2str(key,gensym(key),indent)) end table.insert(sref, seen[t]..'['..(seen[key] or globals[key] or gensym(key)) - ..'] = '..(seen[value] or val2str(value,nil,indent))) + ..']'..space..'='..space..(seen[value] or val2str(value,nil,indent))) else table.insert(out,val2str(value,key,indent,spath,plainindex,level+1)) end end local prefix = string.rep(indent or '', level) - return tag..(indent and '{\n'..prefix..indent or '{').. - table.concat(out, indent and ',\n'..prefix..indent or ', ').. - (indent and "\n"..prefix..'}' or '}')..comment(t) + local head = indent and '{\n'..prefix..indent or '{' + local body = table.concat(out, ','..(indent and '\n'..prefix..indent or space)) + local tail = indent and "\n"..prefix..'}' or '}' + return (custom and custom(tag,head,body,tail) or tag..head..body..tail)..comment(t) else return tag..safestr(t) end -- handle all other types end - local sepr = indent and "\n" or "; " + local sepr = indent and "\n" or ";"..space local body = val2str(t, name, indent) -- this call also populates sref local tail = #sref>0 and table.concat(sref, sepr)..sepr or '' return not name and body or "do local "..body..sepr..tail.."return "..name..sepr.."end" end -return { _NAME = n, _COPYRIGHT = c, _DESCRIPTION = d, _VERSION = v, - serialize = function(t,n,i,f) return serialize(t,n or '_',i,f) end, - printmult = function(t,i) return serialize(t,nil,i or ' ') end, - printsing = function(t) return serialize(t) end } \ No newline at end of file +local function merge(a, b) if b then for k,v in pairs(b) do a[k] = v end end; return a; end +return { _NAME = n, _COPYRIGHT = c, _DESCRIPTION = d, _VERSION = v, serialize = s, + dump = function(a, opts) return s(a, merge({name = '_', compact = true, sparse = true}, opts)) end, + line = function(a, opts) return s(a, merge({sortkeys = true, comment = true}, opts)) end, + block = function(a, opts) return s(a, merge({indent = ' ', sortkeys = true, comment = true}, opts)) end } \ No newline at end of file diff --git a/t/bench.lua b/t/bench.lua index 41e4455..77bffe3 100644 --- a/t/bench.lua +++ b/t/bench.lua @@ -1,6 +1,6 @@ local ITERS = 1000 local TESTS = { - serpent = function() return require("serpent").serialize end, + serpent = function() return require("serpent").dump end, penlight = function() return require("pl.pretty").write end, metalua = function() require("serialize"); return (_G or _ENV).serialize end, nucleo = function() diff --git a/t/test.lua b/t/test.lua index a6efe64..ac8752f 100644 --- a/t/test.lua +++ b/t/test.lua @@ -1,5 +1,5 @@ local serpent = require("serpent") -local serialize = serpent.serialize +local serialize = serpent.dump --[[ Penlight local serialize = require("pl.pretty").write --]] @@ -26,9 +26,9 @@ local a = { a.c = a -- self-reference a[a] = a -- self-reference with table as key -print("pretty: " .. serpent.printmult(a) .. "\n") -- serialize(a, nil, ' ') -print("line: " .. serpent.printsing(a) .. "\n") -- serialize(a, nil) -local str = serpent.serialize(a, 'a') +print("pretty: " .. serpent.block(a) .. "\n") +print("line: " .. serpent.line(a) .. "\n") +local str = serpent.dump(a) print("full: " .. str .. "\n") local fun, err = loadstring(str) @@ -45,5 +45,10 @@ assert(tostring(_a[_b]) == tostring(0/0), "table as key and undefined value: fai assert(_a[math.huge] == -math.huge, "math.huge as key and value: failed") assert(_a[io.stdin] == 3, "io.stdin as key: failed") assert(_a[_c] == print, "shared function as key and global function as value: failed") -assert(#(_a.list) == #(a.list), "size of array part stays the same: failed") assert(#(_a.list[7]) == 0, "empty table stays empty: failed") +assert(_a.list[4] == 'f', "specific table element preserves its value: failed") + +-- test without sparsness to check the number of elements in the list with nil +_a = loadstring(serpent.dump(a, {sparse = false, nocode = true}))() +assert(pcall(function() _a.z() end) == false, "nocode replaces functions with dummy errors: failed") +assert(#(_a.list) == #(a.list), "size of array part stays the same: failed")