Added options to configure serialization process

This commit is contained in:
Paul Kulchenko 2012-06-12 23:17:44 -07:00
parent 29b8d35fd3
commit e89d8b8a9b
4 changed files with 119 additions and 35 deletions

View File

@ -8,10 +8,11 @@ Lua serializer and pretty printer.
* Provides single-line and multi-line output.
* Nested tables are properly indented in the multi-line output.
* Numerical keys are listed first.
* Keys are (optionally) sorted alphanumerically.
* Array part skips keys (`{'a', 'b'}` instead of `{[1] = 'a', [2] = 'b'}`).
* `nil` values are included when expected (`{1, nil, 3}` instead of `{1, [3]=3}`).
* Keys use short notation (`{foo = 'foo'}` instead of `{['foo'] = 'foo'}`).
* Shared and self-references are marked in the output.
* Shared references and self-references are marked in the output.
* Machine readable: provides reliable deserialization using `loadstring()`.
* Supports deeply nested tables.
* Supports tables with self-references.
@ -19,6 +20,7 @@ Lua serializer and pretty printer.
* Supports function serialization using `string.dump()`.
* Supports serialization of global functions.
* Escapes new-line `\010` and end-of-file control `\026` characters in strings.
* Configurable with options and custom formatters.
## Usage
@ -27,14 +29,67 @@ local serpent = require("serpent")
local a = {1, nil, 3, x=1, ['true'] = 2, [not true]=3}
a[a] = a -- self-reference with a table as key and value
print(serpent.serialize(a)) -- full serialization
print(serpent.printsing(a)) -- single line, no self-ref section
print(serpent.printmult(a)) -- multi-line indented, no self-ref section
print(serpent.dump(a)) -- full serialization
print(serpent.line(a)) -- single line, no self-ref section
print(serpent.block(a)) -- multi-line indented, no self-ref section
local fun, err = loadstring(serpent.serialize(a))
local fun, err = loadstring(serpent.dump(a))
if err then error(err) end
local copy = fun()
```
## Functions
Serpent provides three functions that are shortcuts to the same
internal function, but set different options by default:
* `dump(a[, {...}])` -- full serialization; sets `name`, `compact` and `sparse` options
* `line(a[, {...}])` -- single line, no self-ref section; sets `sortkeys` and `comment` options
* `block(a[, {...}])` -- multi-line indented, no self-ref section; sets `indent`, `sortkeys`, and `comment` options
## Options
* name (string) -- name; triggers full serialization with self-ref section
* indent (string) -- indentation; triggers long multi-line output
* comment (true/False) -- provide stringified value in a comment
* sortkeys (true/False) -- sort keys
* sparse (true/False) -- force sparse encoding (no nil filling based on #t)
* compact (true/False) -- remove spaces
* fatal (true/False) -- raise fatal error on non-serilizable values
* nocode (true/False) -- disable bytecode serialization for easy comparison
* nohuge (true/False) -- disable checking numbers against undefined and huge values
* custom (function) -- provide custom output for tables
These options can be provided as a second parameter to Serpent functions.
```lua
block(a, {fatal = true})
line(a, {nocode = true})
function todiff(a) return dump(a, {nocode = true, indent = ' '}) end
```
## Formatters
Serpent supports a way to provide a custom formatter that allows to fully
customize the output. For example, the following call will apply
`Foo{bar} notation to its output (used by Metalua to display ASTs):
```lua
print((require "serpent").block(ast, {comment = false, custom =
function(tag,head,body,tail)
local out = head..body..tail
if tag:find('^lineinfo') then
out = out:gsub("\n%s+", "") -- collapse lineinfo to one line
elseif tag == '' then
body = body:gsub('%s*lineinfo = [^\n]+', '')
local _,_,atag = body:find('tag = "(%w+)"%s*$')
if atag then
out = "`"..atag..head.. body:gsub('%s*tag = "%w+"%s*$', '')..tail
out = out:gsub("\n%s+", ""):gsub(",}","}")
else out = head..body..tail end
end
return tag..out
end}))
```
## Limitations
@ -60,8 +115,6 @@ strings (to address http://lua-users.org/lists/lua-l/2007-07/msg00362.html,
which is already fixed in Lua 5.2) and to check all numbers for `math.huge`.
The seconds number excludes this processing to put it on an equal footing
with other modules that skip these checks (`nucleo` still checks for `math.huge`).
There is no switch to disable this processing though as without it there is
no guarantee that the generated string is deserializable.
## Author
@ -70,3 +123,15 @@ Paul Kulchenko (paul@kulchenko.com)
## License
See LICENSE file.
## History
Jun 12 2012 v0.12
- Added options to configure serialization process.
- Added 'goto' to the list of keywords for Lua 5.2.
- Changed interface to dump/line/block methods.
- Changed 'math.huge' to 1/0 for better portability.
- Replaced \010 with \n for better readability.
Jun 03 2012 v0.10
- First public release.

View File

@ -1,5 +1,5 @@
local n, v, c, d = "serpent", 0.1, -- (C) 2012 Paul Kulchenko; MIT License
"Paul Kulchenko", "Serialization and pretty printing of Lua data types"
local n, v = "serpent", 0.12 -- (C) 2012 Paul Kulchenko; MIT License
local c, d = "Paul Kulchenko", "Serializer and pretty printer of Lua data types"
local snum = {[tostring(1/0)]='1/0 --[[math.huge]]',[tostring(-1/0)]='-1/0 --[[-math.huge]]',[tostring(0/0)]='0/0'}
local badtype = {thread = true, userdata = true}
local keyword, globals, G = {}, {}, (_G or _ENV)
@ -10,13 +10,16 @@ for k,v in pairs(G) do globals[v] = k end -- build func to name mapping
for _,g in ipairs({'coroutine', 'debug', 'io', 'math', 'string', 'table', 'os'}) do
for k,v in pairs(G[g]) do globals[v] = g..'.'..k end end
local function serialize(t, name, indent, fatal)
local function s(t, opts)
local name, indent, fatal = opts['name'], opts['indent'], opts['fatal']
local sparse, nocode, custom = opts['sparse'], opts['nocode'], opts['custom']
local huge, space = not opts['nohuge'], (opts['compact'] and '' or ' ')
local seen, sref = {}, {}
local function gensym(val) return tostring(val):gsub("[^%w]","") end
local function safestr(s) return type(s) == "number" and (snum[tostring(s)] or s)
local function safestr(s) return type(s) == "number" and (huge and snum[tostring(s)] or s)
or type(s) ~= "string" and tostring(s) -- escape NEWLINE/010 and EOF/026
or ("%q"):format(s):gsub("\010","n"):gsub("\026","\\026") end
local function comment(s) return ' --[['..tostring(s)..']]' end
local function comment(s) return opts['comment'] and ' --[['..tostring(s)..']]' or '' end
local function globerr(s) return globals[s] and globals[s]..comment(s) or not fatal
and safestr(tostring(s))..' --[[err]]' or error("Can't serialize "..tostring(s)) end
local function safename(path, name) -- generates foo.bar, foo[3], or foo['b a r']
@ -25,51 +28,62 @@ local function serialize(t, name, indent, fatal)
local safe = plain and n or '['..safestr(n)..']'
return (path or '')..(plain and path and '.' or '')..safe, safe
end
local function alphanumsort(o, n)
local maxn = tonumber(n) or 12
local function padnum(d) return ("%0"..maxn.."d"):format(d) end
table.sort(o, function(a,b)
return tostring(a):gsub("%d+",padnum) < tostring(b):gsub("%d+",padnum) end)
end
local function val2str(t, name, indent, path, plainindex, level)
local ttype, level = type(t), (level or 0)
local spath, sname = safename(path, name)
local tag = plainindex and ((type(name) == "number") and '' or name..' = ')
or (name ~= nil and sname..' = ' or '')
local tag = plainindex and
((type(name) == "number") and '' or name..space..'='..space) or
(name ~= nil and sname..space..'='..space or '')
if seen[t] then
table.insert(sref, spath..' = '..seen[t])
table.insert(sref, spath..space..'='..space..seen[t])
return tag..'nil --[[ref]]'
elseif badtype[ttype] then return tag..globerr(t)
elseif ttype == 'function' then
seen[t] = spath
local ok, res = pcall(string.dump, t)
local func = ok and "loadstring("..safestr(res)..",'@serialized')"..comment(t)
local func = ok and (nocode and "function()error('dummy')end" or
"loadstring("..safestr(res)..",'@serialized')"..comment(t))
return tag..(func or globerr(t))
elseif ttype == "table" then
seen[t] = spath
if next(t) == nil then return tag..'{}'..comment(t) end -- table empty
local maxn, o, out = #t, {}, {}
for key = 1, maxn do table.insert(o, key) end -- first array part
for key = 1, maxn do -- first array part
if t[key] or not sparse then table.insert(o, key) end end
for key in pairs(t) do -- then hash part (skip array keys up to maxn)
if type(key) ~= "number" or key > maxn then
table.insert(o, key) end end
if type(key) ~= "number" or key > maxn then table.insert(o, key) end end
if opts['sortkeys'] then alphanumsort(o, opts['sortkeys']) end
for n, key in ipairs(o) do
local value, ktype, plainindex = t[key], type(key), n <= maxn
local value, ktype, plainindex = t[key], type(key), n <= maxn and not sparse
if badtype[ktype] then plainindex, key = true, '['..globerr(key)..']' end
if ktype == 'table' or ktype == 'function' then
if not seen[key] and not globals[key] then
table.insert(sref, 'local '..val2str(key,gensym(key),indent)) end
table.insert(sref, seen[t]..'['..(seen[key] or globals[key] or gensym(key))
..'] = '..(seen[value] or val2str(value,nil,indent)))
..']'..space..'='..space..(seen[value] or val2str(value,nil,indent)))
else table.insert(out,val2str(value,key,indent,spath,plainindex,level+1)) end
end
local prefix = string.rep(indent or '', level)
return tag..(indent and '{\n'..prefix..indent or '{')..
table.concat(out, indent and ',\n'..prefix..indent or ', ')..
(indent and "\n"..prefix..'}' or '}')..comment(t)
local head = indent and '{\n'..prefix..indent or '{'
local body = table.concat(out, ','..(indent and '\n'..prefix..indent or space))
local tail = indent and "\n"..prefix..'}' or '}'
return (custom and custom(tag,head,body,tail) or tag..head..body..tail)..comment(t)
else return tag..safestr(t) end -- handle all other types
end
local sepr = indent and "\n" or "; "
local sepr = indent and "\n" or ";"..space
local body = val2str(t, name, indent) -- this call also populates sref
local tail = #sref>0 and table.concat(sref, sepr)..sepr or ''
return not name and body or "do local "..body..sepr..tail.."return "..name..sepr.."end"
end
return { _NAME = n, _COPYRIGHT = c, _DESCRIPTION = d, _VERSION = v,
serialize = function(t,n,i,f) return serialize(t,n or '_',i,f) end,
printmult = function(t,i) return serialize(t,nil,i or ' ') end,
printsing = function(t) return serialize(t) end }
local function merge(a, b) if b then for k,v in pairs(b) do a[k] = v end end; return a; end
return { _NAME = n, _COPYRIGHT = c, _DESCRIPTION = d, _VERSION = v, serialize = s,
dump = function(a, opts) return s(a, merge({name = '_', compact = true, sparse = true}, opts)) end,
line = function(a, opts) return s(a, merge({sortkeys = true, comment = true}, opts)) end,
block = function(a, opts) return s(a, merge({indent = ' ', sortkeys = true, comment = true}, opts)) end }

View File

@ -1,6 +1,6 @@
local ITERS = 1000
local TESTS = {
serpent = function() return require("serpent").serialize end,
serpent = function() return require("serpent").dump end,
penlight = function() return require("pl.pretty").write end,
metalua = function() require("serialize"); return (_G or _ENV).serialize end,
nucleo = function()

View File

@ -1,5 +1,5 @@
local serpent = require("serpent")
local serialize = serpent.serialize
local serialize = serpent.dump
--[[ Penlight
local serialize = require("pl.pretty").write --]]
@ -26,9 +26,9 @@ local a = {
a.c = a -- self-reference
a[a] = a -- self-reference with table as key
print("pretty: " .. serpent.printmult(a) .. "\n") -- serialize(a, nil, ' ')
print("line: " .. serpent.printsing(a) .. "\n") -- serialize(a, nil)
local str = serpent.serialize(a, 'a')
print("pretty: " .. serpent.block(a) .. "\n")
print("line: " .. serpent.line(a) .. "\n")
local str = serpent.dump(a)
print("full: " .. str .. "\n")
local fun, err = loadstring(str)
@ -45,5 +45,10 @@ assert(tostring(_a[_b]) == tostring(0/0), "table as key and undefined value: fai
assert(_a[math.huge] == -math.huge, "math.huge as key and value: failed")
assert(_a[io.stdin] == 3, "io.stdin as key: failed")
assert(_a[_c] == print, "shared function as key and global function as value: failed")
assert(#(_a.list) == #(a.list), "size of array part stays the same: failed")
assert(#(_a.list[7]) == 0, "empty table stays empty: failed")
assert(_a.list[4] == 'f', "specific table element preserves its value: failed")
-- test without sparsness to check the number of elements in the list with nil
_a = loadstring(serpent.dump(a, {sparse = false, nocode = true}))()
assert(pcall(function() _a.z() end) == false, "nocode replaces functions with dummy errors: failed")
assert(#(_a.list) == #(a.list), "size of array part stays the same: failed")