Documentation for this module may be created at Module:Sandbox/Stibba0/Names/doc

-- Extracting names from names for GCI! :)
local dutchSeparation = {
	"van",
	"voor",
	"aan",
	"uit",
	"onder",
	"boven",
	"in",
	"de",
	"den",
	"der",
	"te",
	"ten",
	"ter"
}
local chineseSeperation = {
	"li",
	"zhang",
	"chen",
	"zhao",
	"wu",
	"zhou",
	"wang",
	"ma",
	"pan",
	"gu",
	"mao"
}
local arabicSeperation = {
	"el",
	"al",
	"ash"
}
local spanishSeperation = {
	"de"
}
local spanishLetters = {
	"á", 
	"é", 
	"í", 
	"ó", 
	"ú", 
	"ü", 
	"ñ"
}
local suffix = {
	"Sr",
	"Jr",
	"I",
	"V",
	"X"
}
local formats = {
	{"nl", dutchSeparation},
	{"zh", chineseSeperation},
	{"en", {}},
	{"ar", arabicSeperation},
	{"es", spanishSeperation}
}

local divideWords = function(text)
	wordTable = {}
	for word in string.gmatch(text, "%S+") do 
		wordTable[#wordTable+1] = word
	end	
	for i=1, #suffix do
		if string.match(wordTable[#wordTable], suffix[i]) then
			if #wordTable[#wordTable] < 4 then
				wordTable[#wordTable] = nil
			end
		end
	end
	return wordTable
end

local identifyFormat = function(wordList, fullname)
	local nameFormat = nil
	for i=1, #formats do
		for z=1, #formats[i][2] do
			for x=1, #wordList do
				if string.lower(wordList[x]) == formats[i][2][z] then
					if string.lower(wordList[x]) == "de" then
						for b=1, #spanishLetters do
							if string.match(fullname, spanishLetters[b]) then
								return {"es", x}	
							end
						end
						for b=1, #wordList do
							if wordList[b][#wordList[b]] == 'e' or wordList[b][#wordList[b]] == 'a' or wordList[b][#wordList[b]] == 'o' or wordList[b][#wordList[b]] == 'i' then
								return {"es", x}	
							end
						end
						return {"nl", x}
					else
						return {formats[i][1], x}
					end
				end
			end
		end
	end
	return {"std", nil}
end

local nameSeparation = function(wordList, lastCount)
	local names = {
		["first"]="",
		["last"]=""
	}
	for i=1, #wordList do
		if 	i < lastCount then
			names["first"] = names["first"] .. " " .. wordList[i]	
		else
			names["last"] = names["last"] .. " " .. wordList[i]		
		end
	end
	return names
end

local formatName = function(wordList, nameFormat, providedFormat)
	local names = {
		["first"]="",
		["last"]=""
	}
	if not(providedFormat == "") then
		nameFormat[1] = providedFormat	
	end
	
	if nameFormat[1] == "nl" or nameFormat[1] == "ar" then
		names = nameSeparation(wordList, nameFormat[2])
	end
	
	if nameFormat[1] == "zh" then
		for i=1, #wordList do
			if 	i == 1 then
				names["last"] = wordList[i]	
			else
				names["first"] = names["first"] .. " " .. wordList[i]		
			end
		end
	end
	
	if nameFormat[1] == "es" then
		if not( nameFormat[2] == nil) then
			if nameFormat[2] > 2 then
				for i=1, #wordList do
					if 	i < (nameFormat[2]-1) then
						names["first"] = names["first"] .. " " .. wordList[i]	
					else
						names["last"] = names["last"] .. " " .. wordList[i]		
					end
				end
			elseif #wordList > 3 then
				names = nameSeparation(wordList, nameFormat[2])	
			end
		else
			names = nameSeparation(wordList, #wordList)
		end
	end
	
	if nameFormat[1] == "std" then
		names = nameSeparation(wordList, #wordList)
	end
	
	return names
end

local returnNameString = function(names)
	return "<b>Given</b> = " .. names["first"] .. " -- <b>Family</b> = " .. names["last"]
end

local p = {}

p.getNames = function(frame)
	local fullname = frame.args.name
	local provFormat = frame.args.format or ""
	local wordList = divideWords(fullname)
	local nameFormat = identifyFormat(wordList, fullname)
	local names = formatName(wordList, nameFormat, provFormat)
	local nameString = returnNameString(names)
	return nameString
end

return p