Documentation for this module may be created at Module:Sandbox/Nardog/7es/doc
local p = {}
local g2p = {
[" "] = " ",
["#"] = "#", -- remove later
["a"] = "a",
["á"] = "á",
["b"] = "β",
["c"] = "k",
["ce"] = "θe",
["cé"] = "θé",
["ci"] = "θi",
["cí"] = "θí",
["ch"] = "tʃ",
["d"] = "ð",
["e"] = "e",
["é"] = "é",
["f"] = "f",
["g"] = "ɣ",
["ge"] = "xe",
["gé"] = "xé",
["gi"] = "xi",
["gí"] = "xí",
["gue"] = "ɣe",
["gué"] = "ɣé",
["gui"] = "ɣi",
["guí"] = "ɣí",
["gü"] = "ɣu",
["i"] = "i",
["í"] = "í",
["j"] = "x",
["k"] = "k",
["l"] = "l",
["ll"] = "ʎ",
["m"] = "m",
["n"] = "n",
["ñ"] = "ɲ",
["o"] = "o",
["ó"] = "ó",
["p"] = "p",
["qu"] = "k",
["r"] = "ɾ",
["rr"] = "r",
["s"] = "s",
["t"] = "t",
["tx"] = "tʃ",
["tz"] = "ts",
["u"] = "u",
["ú"] = "ú",
["v"] = "β",
["w"] = "w",
["x"] = "ks", --
["y"] = "ʝ",
["z"] = "θ", --
}
function p.main(args)
local s = mw.ustring.lower(mw.text.trim(args[1]))
s = mw.text.split(s, '') -- now a table
local ret = {}
do
local maxLen = 1
for k, _ in pairs(g2p) do
local len = mw.ustring.len(k)
if len > maxLen then
maxLen = len
end
end
local i = 1
repeat
for j = maxLen - 1, 0, -1 do -- count down
if s[i + j] then
local graph = ''
for k = 0, j do
graph = graph .. s[i + k]
end
if g2p[graph] then
table.insert(ret, g2p[graph])
i = i + j -- skip next
break
end
end
end
i = i + 1
until i > #s
end
ret = table.concat(ret)
ret = mw.ustring.gsub(ret, ' +', ' ')
if args.seseo == 'yes' then
ret = mw.ustring.gsub(ret, 'θ', 's')
end
if args.yeismo == 'yes' then
ret = mw.ustring.gsub(ret, 'ʎ', 'ʝ')
end
-- no geminates
ret = mw.ustring.gsub(ret, '([^#aeiouáéíóú])%1', '%1')
-- x
ret = mw.ustring.gsub(ret, '^ks', 's')
ret = mw.ustring.gsub(ret, '[ #]ks', 's')
-- voice
ret = mw.ustring.gsub(ret, '([aeiouáéíóú][^ #aeiouáéíóú]?)s([ #]?[βðɣʝlʎmnrɾv])', '%1z%2')
ret = mw.ustring.gsub(ret, '([aeiouáéíóú])f([ #]?[βðɣʝʎmnz])', '%1v%2')
-- word-internal coda
ret = mw.ustring.gsub(ret, '([aeiouáéíóú])p([^ #aeiouáéíóú][^ #])', '%1β%2')
ret = mw.ustring.gsub(ret, '([aeiouáéíóú])t([^ #aeiouáéíóú][^ #])', '%1ð%2')
ret = mw.ustring.gsub(ret, '([aeiouáéíóú])k([^ #aeiouáéíóú][^ #])', '%1ɣ%2')
-- vowel
ret = mw.ustring.gsub(ret, 'i([aeoáéóu])', 'j%1')
ret = mw.ustring.gsub(ret, 'u([aeoáéói])', 'w%1')
ret = mw.ustring.gsub(ret, '([aeoáéó])i', '%1j')
ret = mw.ustring.gsub(ret, '([aeoáéó])u', '%1w')
-- plosive
ret = mw.ustring.gsub(ret, '^β', 'b')
ret = mw.ustring.gsub(ret, '([mn][ #]?)β', '%1b')
ret = mw.ustring.gsub(ret, '^ð', 'd')
ret = mw.ustring.gsub(ret, '([lmn][ #]?)ð', '%1d')
ret = mw.ustring.gsub(ret, '^ɣ', 'g')
ret = mw.ustring.gsub(ret, '([mn][ #]?)ɣ', '%1g')
-- palatal
ret = mw.ustring.gsub(ret, '^[jʝ]', 'ɟʝ')
ret = mw.ustring.gsub(ret, '[ #]j', 'ʝ')
ret = mw.ustring.gsub(ret, '([lmn][ #]?)ʝ', '%1ɟʝ')
ret = mw.ustring.gsub(ret, 'ʝ([^aeiouáéíóújw])', 'j%1')
ret = mw.ustring.gsub(ret, 'ʝ$', 'j')
-- nasal
ret = mw.ustring.gsub(ret, 'm$', 'n')
ret = mw.ustring.gsub(ret, 'n([ #]?[bpm])', 'm%1')
ret = mw.ustring.gsub(ret, '[mn]([ #]?f)', 'ɱ%1')
ret = mw.ustring.gsub(ret, 'm([ #]?[dlnrɾtθs])', 'n%1')
ret = mw.ustring.gsub(ret, '[mn]([ #]?[ɟɲʎ])', 'ɲ%1')
ret = mw.ustring.gsub(ret, '[mn]([ #]?[gkx])', 'ŋ%1')
-- vibrant
ret = mw.ustring.gsub(ret, '^ɾ', 'r')
ret = mw.ustring.gsub(ret, '([ #lmns])ɾ', '%1r')
-- stress
local words = {}
for word in mw.ustring.gmatch(ret, '[^ ]+') do
if mw.ustring.find(word, '[áéíóú]') then
local t = {
["á"] = "ˈa", ["é"] = "ˈe", ["í"] = "ˈi", ["ó"] = "ˈo",
["ú"] = "ˈu",
}
word = mw.ustring.gsub(word, '[áéíóú]', t)
elseif mw.ustring.find(word, '[aeiou].-[aeioumnɲŋs]$') then -- penultimate
word = mw.ustring.gsub(word, '([aeiou][^#aeiou]-[aeiou][^#aeiou]?[mnɲŋs]?)$', 'ˈ%1')
else -- final
word = mw.ustring.gsub(word, '([aeiou][^#aeiou]-)$', 'ˈ%1')
end
if mw.ustring.find(word, '^[^aeiou]-ˈ') then -- word-initial
word = mw.ustring.gsub(word, '^(.-)ˈ', 'ˈ%1')
elseif mw.ustring.find(word, '[ptkbβdgɣf][jwlrɾ]-ˈ') then -- complex onset
word = mw.ustring.gsub(word, '([ptkbβdgɣf][jwlrɾ]-)ˈ', 'ˈ%1')
elseif mw.ustring.find(word, '[^#aeiou]ˈ') then -- simple onset
word = mw.ustring.gsub(word, '([^#aeiou][jw]?)ˈ', 'ˈ%1')
end
table.insert(words, word)
end
ret = table.concat(words, ' ')
ret = mw.ustring.gsub(ret, 'g', 'ɡ')
ret = mw.ustring.gsub(ret, '#', '')
return ret
end
return p