Documentation for this module may be created at Module:Sandbox/Nardog/7es/doc

local p = {}

local g2p = {
	[" "] = " ",
	["#"] = "#", -- remove later
	["a"] = "a",
	["á"] = "á",
	["b"] = "β",
	["c"] = "k",
	["ce"] = "θe",
	["cé"] = "θé",
	["ci"] = "θi",
	["cí"] = "θí",
	["ch"] = "tʃ",
	["d"] = "ð",
	["e"] = "e",
	["é"] = "é",
	["f"] = "f",
	["g"] = "ɣ",
	["ge"] = "xe",
	["gé"] = "xé",
	["gi"] = "xi",
	["gí"] = "xí",
	["gue"] = "ɣe",
	["gué"] = "ɣé",
	["gui"] = "ɣi",
	["guí"] = "ɣí",
	["gü"] = "ɣu",
	["i"] = "i",
	["í"] = "í",
	["j"] = "x",
	["k"] = "k",
	["l"] = "l",
	["ll"] = "ʎ",
	["m"] = "m",
	["n"] = "n",
	["ñ"] = "ɲ",
	["o"] = "o",
	["ó"] = "ó",
	["p"] = "p",
	["qu"] = "k",
	["r"] = "ɾ",
	["rr"] = "r",
	["s"] = "s",
	["t"] = "t",
	["tx"] = "tʃ",
	["tz"] = "ts",
	["u"] = "u",
	["ú"] = "ú",
	["v"] = "β",
	["w"] = "w",
	["x"] = "ks", --
	["y"] = "ʝ",
	["z"] = "θ", --
}

function p.main(args)
	local s = mw.ustring.lower(mw.text.trim(args[1]))
	s = mw.text.split(s, '') -- now a table
	local ret = {}
	do
		local maxLen = 1
		for k, _ in pairs(g2p) do
			local len = mw.ustring.len(k)
			if len > maxLen then
				maxLen = len
			end
		end
		local i = 1
		repeat
			for j = maxLen - 1, 0, -1 do -- count down
				if s[i + j] then
					local graph = ''
					for k = 0, j do
						graph = graph .. s[i + k]
					end
					if g2p[graph] then
						table.insert(ret, g2p[graph])
						i = i + j -- skip next
						break
					end
				end
			end
			i = i + 1
		until i > #s
	end
	ret = table.concat(ret)
	
	ret = mw.ustring.gsub(ret, '  +', ' ')
	
	if args.seseo == 'yes' then
		ret = mw.ustring.gsub(ret, 'θ', 's')
	end
	if args.yeismo == 'yes' then
		ret = mw.ustring.gsub(ret, 'ʎ', 'ʝ')
	end
	
	-- no geminates
	ret = mw.ustring.gsub(ret, '([^#aeiouáéíóú])%1', '%1')
	
	-- x
	ret = mw.ustring.gsub(ret, '^ks', 's')
	ret = mw.ustring.gsub(ret, '[ #]ks', 's')
	
	-- voice
	ret = mw.ustring.gsub(ret, '([aeiouáéíóú][^ #aeiouáéíóú]?)s([ #]?[βðɣʝlʎmnrɾv])', '%1z%2')
	ret = mw.ustring.gsub(ret, '([aeiouáéíóú])f([ #]?[βðɣʝʎmnz])', '%1v%2')
	
	-- word-internal coda
	ret = mw.ustring.gsub(ret, '([aeiouáéíóú])p([^ #aeiouáéíóú][^ #])', '%1β%2')
	ret = mw.ustring.gsub(ret, '([aeiouáéíóú])t([^ #aeiouáéíóú][^ #])', '%1ð%2')
	ret = mw.ustring.gsub(ret, '([aeiouáéíóú])k([^ #aeiouáéíóú][^ #])', '%1ɣ%2')
	
	-- vowel
	ret = mw.ustring.gsub(ret, 'i([aeoáéóu])', 'j%1')
	ret = mw.ustring.gsub(ret, 'u([aeoáéói])', 'w%1')
	ret = mw.ustring.gsub(ret, '([aeoáéó])i', '%1j')
	ret = mw.ustring.gsub(ret, '([aeoáéó])u', '%1w')
	
	-- plosive
	ret = mw.ustring.gsub(ret, '^β', 'b')
	ret = mw.ustring.gsub(ret, '([mn][ #]?)β', '%1b')
	ret = mw.ustring.gsub(ret, '^ð', 'd')
	ret = mw.ustring.gsub(ret, '([lmn][ #]?)ð', '%1d')
	ret = mw.ustring.gsub(ret, '^ɣ', 'g')
	ret = mw.ustring.gsub(ret, '([mn][ #]?)ɣ', '%1g')
	
	-- palatal
	ret = mw.ustring.gsub(ret, '^[jʝ]', 'ɟʝ')
	ret = mw.ustring.gsub(ret, '[ #]j', 'ʝ')
	ret = mw.ustring.gsub(ret, '([lmn][ #]?)ʝ', '%1ɟʝ')
	ret = mw.ustring.gsub(ret, 'ʝ([^aeiouáéíóújw])', 'j%1')
	ret = mw.ustring.gsub(ret, 'ʝ$', 'j')
	
	-- nasal
	ret = mw.ustring.gsub(ret, 'm$', 'n')
	ret = mw.ustring.gsub(ret, 'n([ #]?[bpm])', 'm%1')
	ret = mw.ustring.gsub(ret, '[mn]([ #]?f)', 'ɱ%1')
	ret = mw.ustring.gsub(ret, 'm([ #]?[dlnrɾtθs])', 'n%1')
	ret = mw.ustring.gsub(ret, '[mn]([ #]?[ɟɲʎ])', 'ɲ%1')
	ret = mw.ustring.gsub(ret, '[mn]([ #]?[gkx])', 'ŋ%1')
	
	-- vibrant
	ret = mw.ustring.gsub(ret, '^ɾ', 'r')
	ret = mw.ustring.gsub(ret, '([ #lmns])ɾ', '%1r')
	
	-- stress
	local words = {}
	for word in mw.ustring.gmatch(ret, '[^ ]+') do
		if mw.ustring.find(word, '[áéíóú]') then
			local t = {
				["á"] = "ˈa", ["é"] = "ˈe", ["í"] = "ˈi", ["ó"] = "ˈo",
				["ú"] = "ˈu",
			}
			word = mw.ustring.gsub(word, '[áéíóú]', t)
		elseif mw.ustring.find(word, '[aeiou].-[aeioumnɲŋs]$') then -- penultimate
			word = mw.ustring.gsub(word, '([aeiou][^#aeiou]-[aeiou][^#aeiou]?[mnɲŋs]?)$', 'ˈ%1')
		else -- final
			word = mw.ustring.gsub(word, '([aeiou][^#aeiou]-)$', 'ˈ%1')
		end
		if mw.ustring.find(word, '^[^aeiou]-ˈ') then -- word-initial
			word = mw.ustring.gsub(word, '^(.-)ˈ', 'ˈ%1')
		elseif mw.ustring.find(word, '[ptkbβdgɣf][jwlrɾ]-ˈ') then -- complex onset
			word = mw.ustring.gsub(word, '([ptkbβdgɣf][jwlrɾ]-)ˈ', 'ˈ%1')
		elseif mw.ustring.find(word, '[^#aeiou]ˈ') then -- simple onset
			word = mw.ustring.gsub(word, '([^#aeiou][jw]?)ˈ', 'ˈ%1')
		end
		table.insert(words, word)
	end
	ret = table.concat(words, ' ')
	
	ret = mw.ustring.gsub(ret, 'g', 'ɡ')
	ret = mw.ustring.gsub(ret, '#', '')
	return ret
end

return p