Documentation for this module may be created at Module:Sandbox/Cousteau/strsubst/doc

local p = {}


-- Usage: {{#invoke:<this>|char|<string>|<c1>|<replacement1>|<c2>|<replacement2>[|sep=<separator>]}}
--   string: string to parse
--   c1, c2...: single characters
--   replacement1, replacement2...: what those characters are replaced by
--   separator: separator between replacements
-- Unknown characters are ignored and may be used for spacing.
-- Example: {{#invoke:<this>|.--.|.|dit|-|daah|sep=-}} --> dit-daah-daah-dit
function p.char(frame)
    -- Extract separator
    local sep = ''
    if frame.args.sep ~= nil then sep = frame.args.sep end
    -- Extract key-value pairs
    local tokenlist = {}
    local i = 2 -- NB: #frame.args doesn't work; iterating the old way
    while frame.args[i] ~= nil do
        if frame.args[i] == '' then -- assume empty = space (but be careful with those)
            tokenlist[' '] = frame.args[i+1]
        else
            tokenlist[frame.args[i] ] = frame.args[i+1]
        end
        i = i+2
    end
    --TODO-- check no repeated keys, odd number of args...
    
    -- Parse string: replace each matching token by closest match
    local str = frame.args[1]
    local res = ''
    local isfirst = true
    for i = 1,#str do
        local c = str:sub(i,i) -- get i-th character
        if tokenlist[c] ~= nil then -- if c is a recognized character
            if isfirst then
                isfirst = false
            else
                res = res .. sep -- add separator (except for the first time)
            end
            res = res .. tokenlist[c]
        end
    end
    
    return res
end


-- Usage: {{#invoke:<this>|str|<string>|<key1>|<replacement1>|<key2>|<replacement2>[|sep=separator]}}
--   string: string to parse
--   key1, key2...: substrings to replace (one or more characters)
--   replacement1, replacement2...: what those substrings are replaced by
--   separator: separator between replacements
-- If more than one substring matches, the longest one is used.
-- If no substring matches, one character is chomped; therefore unknown characters may be used for spacing.
-- This function might be more expensive than the .char() version.
-- Example: {{#invoke:<this>|dit daah daah dit|dit|.|daah|-|sep=/}} --> ./-/-/.
function p.str(frame)
    -- Extract separator
    local sep = ''
    if frame.args.sep ~= nil then sep = frame.args.sep end
    -- Extract key-value pairs
    local tokenlist = {}
    local i = 2 -- NB: #frame.args doesn't work; iterating the old way
    while frame.args[i] ~= nil do
        if frame.args[i] == '' then -- assume empty = space (but be careful with those)
            table.insert(tokenlist, {k=' ', v=frame.args[i+1]})
        else
            table.insert(tokenlist, {k=frame.args[i], v=frame.args[i+1]})
        end
        i = i+2
    end
    --TODO-- check no repeated keys, odd number of args...
    -- Sort by key length so that longest strings are tried first
    table.sort(tokenlist, function(a,b) return #a.k > #b.k end)
    
    -- Parse string: replace each matching token by closest match
    local str = frame.args[1]
    local res = ''
    local isfirst = true
    
    while str ~= '' do
        local found = false
        for _,token in ipairs(tokenlist) do
            if str:sub(1, #token.k) == token.k then -- if str begins with substring
                if isfirst then
                    isfirst = false
                else
                    res = res .. sep -- add separator (except for the first time)
                end
                res = res .. token.v -- append replacement to res
                str = str:sub(#token.k+1) -- discard that substring
                found = true
                break
            end
        end
        
        if not found then
            str = str:sub(2) -- discard one character and try again
        end
    end
    
    return res
end


-- [TEST] Print the arguments passed
function p.test(frame)
    local res = ''
    for k,v in pairs(frame.args) do
        res = res .. ' [' .. k .. ']="' .. v .. '"\n'
    end
    return res
end


return p