Module:Sandbox/N3rsti/Names

Revision as of 10:55, 11 January 2020 by imported>N3rsti
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)

Documentation for this module may be created at Module:Sandbox/N3rsti/Names/doc

local p = {}

local lang = {
	chinese_names = {'Wang', 'Li', 'Zhang', 'Liu', 'Chen', 'Yang', 'Huang', 'Zhao', 'Wu', 'Zhou', 'Wong', 'Zedong', 'Zu', 'Dong'},
	chinese_endings = {'ang', 'zhi'},
	spain_letters = {'í', 'á', 'é', 'ó'},
	spanish_names = {'Diego', 'Esteban', 'Luis', 'Fernán', 'Rodríguez', 'Sofia', 'Santiago'},
	dutch_endings = {'er', 'gh', 'ch'},
	polish_endings = {'ski', 'cki', 'ska', 'cka'},
	english_endings = {'ght', 'Jr.', 'Jr', 'Sr.', 'Sr', 'I', 'V', 'X'}
}

--[[ 
Basic pattern function is used for names that wasn't detected by getnames as a name from specific country. 
It returns last word as a surname and other words as a name
--]] 
function basic_pattern(fullname)
	local name = string.match(fullname, "(.+) (%w+)")
	local surname = string.gsub(fullname, name .. " ", "")
	if #name_table > 3 then
		return one_name_pattern(fullname)	
	end
	return "''Given'' = " .. name .. " -- " ..  "''Family'' = " .. surname
end

function one_name_pattern(fullname)
	local name = name_table[1]
	local surname = string.gsub(fullname, name .. " ", "")
	if string.match(fullname, " del ") then
			
	end
	return "''Given'' = " .. name .. " -- " ..  "''Family'' = " .. surname
end

function two_names_pattern(fullname)
	local name = name_table[1] .. " " .. name_table[2]
	local surname = string.gsub(fullname, name .. " ", "")
	return "''Given'' = " .. name .. " -- " ..  "''Family'' = " .. surname
end

function p.getnames(frame)
	local fullname = frame.args.name or ""
	if fullname == "" then
		return "<b>No parameter supplied</b>"
	end
	local name, out, surname = ""
	-- name_table table contains all words from fullname variable
	name_table = {}
	for word in string.gmatch(fullname, "[^%s]+") do
		table.insert(name_table, word)
	end
	-- Loop through every word in fullname and check if it has popular endings for languages
	for _, word in pairs(name_table) do
		if string.sub(word, -2) == "ez" then
			name_format = "es"
		end
		for _, v in pairs(lang.dutch_endings) do
			if string.sub(word, -#v) == v then
				name_format = "nl"
			end
		end
		for _, polish_ending in pairs(lang.polish_endings) do
			if string.sub(word, -#polish_ending) == polish_ending then
				name_format = "pl"
			end
		end
		for _, english_ending in pairs(lang.english_endings) do
			if string.sub(word, -#english_ending) == english_ending then
				name_format = "en"
			end
		end
		for _, chinese_name in pairs(lang.chinese_names) do
			if word == chinese_name then
				name_format = "zh"
			end
		end
		for _, ending in pairs(lang.chinese_endings) do
			if string.sub(word, -3) == ending then
				name_format = "zh"
			end
		end
		-- If code detects chinese name, it returns this in chinese format, because chinese names are different than other names
		if name_format == "zh" then
			surname = string.match(fullname, "(.+) (%w+)")
			name = string.gsub(fullname, surname .. " ", "")
			if string.find(fullname,'%(') then
				local courtesy_name = fullname:match("%((%a+)%)")
				name = string.gsub(name, "%(" .. courtesy_name .. "%)", "")
			end
			return "''Given'' = " .. name .. " -- " ..  "''Family'' = " .. surname
		end
	end

	-- Loop through spain letters from array.
	for _, i in pairs(lang.spain_letters) do
		if string.match(fullname, i) then
			name_format = "es"
			break
		end
	end
	for _, i in pairs(lang.spanish_names) do
		if string.match(fullname, i) then
			name_format = "es"
			break
		end
	end
	if name_format == "es" then
		-- Check if fullname has 2 names
		if string.match(fullname, "de") then
			local index={}
			for k,v in pairs(name_table) do
			   index[v]=k
			end
			-- To get name after "de"
			local sign_index = index["de"] + 1
			local return_surname = fullname
			for i=1,sign_index do
				return_surname = string.gsub(return_surname, name_table[i] .. " ", "")
			end
			surname = return_surname
			name = string.gsub(fullname, surname, "")
			return "''Given'' = " .. name .. " -- " ..  "''Family'' = " .. surname
		end
		for _, i in pairs(lang.spanish_names) do
			if name_table[2] == i then
				return two_names_pattern(fullname)
			end
		end
		return one_name_pattern(fullname)
	elseif name_format == "pl" then
		-- Check if fullname is longer than 2.
		-- Most of the polish surnames are single, or they are splitted with hyphen, so if fullname is longer than 3 words, it will most likely have 2 given names
		if #name_table > 2 then
			return two_names_pattern(fullname)
		end
		return basic_pattern(fullname) 
	elseif name_format == "en" then
		for _, word in pairs(name_table) do
			for _, english_ending in pairs(lang.english_endings) do
				if string.sub(word, -#english_ending) then
					return one_name_pattern(fullname)
				end
			end
		end
		return basic_pattern(fullname)
	end


	for counter, word in pairs(name_table) do
			if word == "van" or word == "de" then
				for i=1,counter - 1 do
					name = name .. name_table[i]
				end
				surname = string.gsub(fullname, name .. " ", "")
				return "''Given'' = " .. name .. " -- " ..  "''Family'' = " .. surname
			end
	end
	-- If there is no format passed and code haven't detected where can be passed name from, it will return function basic_pattern
	return basic_pattern(fullname)

end

return p