Documentation for this module may be created at Module:Sandbox/N3rsti/Dates2/doc

local p = {}

local date_words = {
	months={"january", "february", "march", "april", "may", "june", "july", "august", "september", "october", "november", "december"},
	short_months={"jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec"},
	accepted_endings = {"AD", "BCE", "BC", "CE"},
	months_days = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31},
	number_endings = {"st", "nd", "rd", "th"},
	circa_words = {'circa', 'sometime', 'around', 'uncertain'},
	accepted_words = {'year', 'month', 'day'}
}

-- Search for keywords
function check_valid(date_text, date_table)
	local date_sign = "[^%s]+"
	if string.match(date_text, "%-") then
		date_sign = "[^%-]+"
	elseif string.match(date_text, "%/") then
		date_sign = "[^%/]+"
	end
	for word in string.gmatch(date_text, date_sign) do
			table.insert(date_table, word)
	end
	for counter, month in pairs(date_words.months) do
		for _, word in pairs(date_table) do
			if string.lower(word) == month or string.lower(word) == date_words.short_months[counter] then
				return {is_valid=true}	
			end
		end
	end
	for _, ending in pairs(date_words.accepted_endings) do
		for _, word in pairs(date_table) do
			if word == ending then
				return {is_valid=true}	
			end
		end
	end
	for _, ending in pairs(date_words.circa_words) do
		if string.match(date_text, ending) then
			return {is_valid=true}	
		end
	end
	for _, word in pairs(date_words.accepted_words) do
		if string.match(date_text, word) then
			return {is_valid=true}	
		end
	end
	
end

-- Define format from submitted date
function detect_date(date_text)
	local date_sign = "[^%s]+"
	local sign, date_ending = ""
	local circa_ending = ""
	if string.match(date_text, "%-") then
		date_sign = "[^%-]+"
	elseif string.match(date_text, "%/") then
		date_sign = "[^%/]+"
	end
	local date_table = {}
	for word in string.gmatch(date_text, date_sign) do
		table.insert(date_table, word)
	end
	for _, ending in pairs(date_words.accepted_endings) do
		for _, word in pairs(date_table) do
			if word == ending then
				date_ending = ending
			end
		end
	end
	for _, num_end in pairs(date_words.number_endings) do
		for _, word in pairs(date_table) do
			if string.sub(word, -#num_end) == num_end then
				num_ending = num_end
			end
		end
	end
	for _, ending in pairs(date_words.circa_words) do
		if string.match(date_text, ending) then
			circa_ending = "circa"	
		end
	end
	date_ending = date_ending or ""
	num_ending = num_ending or ""
	if #date_table == 1 and tonumber(date_text) then
			return {day=date_text}	
	end
	if string.match(date_text, "%-") then
		date_sign = "[^%-]+"
		year, month, day = string.match(date_text, "(%d+)-(%d+)-(%d+)")
		if year then
			return {day=day, month=month, year=year, date_ending=date_ending, circa_ending=circa_ending}	
		end
	elseif string.match(date_text, "%/") then
		date_sign = "[^%/]+"
		day, month, year = string.match(date_text, "(%d+)/(%d+)/(%d+)")
		if day then
			return {day=day, month=month, year=year, date_ending=date_ending, circa_ending=circa_ending}	
		end
	end
	if #date_table == 2 and tonumber(date_table[1]) and tonumber(date_table[2]) then
		local day, year = string.match(date_text, "(%d+) (%d+)")
		if day then
			return {year=year, date_ending=date_ending, circa_ending=circa_ending}	
		end
	end
	if check_valid(date_text, date_table) then
		local day, month, year = string.match(date_text, "(%d+)" .. num_ending .. " (%w+) (%d+)")
		if day then
			return {day=day, month=month, year=year, date_ending=date_ending, circa_ending=circa_ending}	
		else
			local day, month, year = string.match(date_text, "(%d+) (%w+) (%d+)")
			if day then
				return {day=day, month=month, year=year, date_ending=date_ending, circa_ending=circa_ending}
			end
			month, day, year = string.match(date_text, "(%w+) (%d+), (%d+)") -- December 21, 2019
			if day then
				return 	{day=day, month=month, year=year, date_ending=date_ending, circa_ending=circa_ending}	
			end
			for word in string.gmatch(date_text, date_sign) do
				table.insert(date_table, word)
			end
			day=string.match(date_text, "(%d+)" .. num_ending)
			_, year = string.match(date_text, "(.+) (%d+)")
			if day and year then
				for _, arr_month in pairs(date_words.months) do
					month = arr_month
				end
				if month then
					return {day=day, month=month, year=year, date_ending=date_ending, circa_ending=circa_ending}	
				end
			end
			day, month = string.match(date_text, "(%d+) (%d+)")
			if day then
				return 	{day=day, year=year, date_ending=date_ending, circa_ending=circa_ending}
			end
			day, month = string.match(date_text, "(%d+) (%w+)")
			if day then
				return {day=day, month=month, date_ending=date_ending, circa_ending=circa_ending}	
			end
			
		end
		
		
		
	end
	if check_valid(date_text, date_table) then
		year = string.match(date_text, "(%d+) (.+)")
		if year then
			return {year=year, circa_ending=circa_ending, date_ending=date_ending}	
		end
	end
	
	
	day, month = string.match(date_text, "(%d+) (%d+)")
	if day then
		return {day=day, month=month, year=year, date_ending=date_ending, circa_ending=circa_ending}	
	end
end

-- Format date to expected format
function p.format_date(frame)
	local date_text = frame.args.date or ""
	local date_format = frame.args.format or ""
	local date_ending = ""
	local day = ""
	local month = ""
	local year = ""
	local circa_ending = ""
	local date_ending = ""
	if (string.match(date_text, "/") and date_format == "") or (string.match(date_text, "-") and date_format == "") then
		date_format = "iso"
	end
	if detect_date(date_text) and detect_date(date_text).day then
		day = detect_date(date_text).day or ""
	end
	if detect_date(date_text) and detect_date(date_text).month then
		month = detect_date(date_text).month or ""
	end
	if detect_date(date_text) and detect_date(date_text).year then
		year = detect_date(date_text).year or ""
	end
	if detect_date(date_text) and detect_date(date_text).date_ending then
		date_ending = detect_date(date_text).date_ending or ""
	end
	if detect_date(date_text) and detect_date(date_text).circa_ending then
		circa_ending = detect_date(date_text).circa_ending or ""
	end
	for month_num=1, #date_words.months do
		if string.match(string.lower(date_text), date_words.months[month_num]) or string.match(string.lower(date_text), date_words.short_months[month_num]) then
			month = date_words.months[month_num]
			if tonumber(year) and tonumber(year) % 4 == 0 then
				date_words.months_days[month_num] = date_words.months_days[month_num] + 1	
			end
			if tonumber(day) > date_words.months_days[month_num] then
				return "Invalid entry"	
			end
			break
		end
	end
	local index={}
	for k,v in pairs(date_words.months) do
	   index[v]=k
	end
	month = month:gsub("^%l", string.upper)
	if date_format == "iso" then
		if not tonumber(month) then
			month = tostring(index[string.lower(month)])
			if #month == 1 then
				month = "0" .. month	
			end
			if #day == 1 then
				day = "0" .. day
			end
		end
		return circa_ending .. " " .. year .. "-" .. month .. "-" .. day .. " " .. date_ending
	elseif date_format == "mdy" then
		if tonumber(month) then
			month = date_words.months[tonumber(month)] or ""
			month = month:gsub("^%l", string.upper) or ""
		end
		return circa_ending .. " " .. month .. " " ..  day .. ", " .. year .. " " .. date_ending
	elseif date_format == "year" then
		return circa_ending .. " " .. year .. " " .. date_ending
	end
	if day == "" and month == "" and year == "" then
		return "Invalid entry"	
	end
	if tonumber(month) then
		month = date_words.months[tonumber(month)] or ""
		month = month:gsub("^%l", string.upper) or ""
	end
	if month == date_ending then
		month = ""
	end
	if year == date_ending then
		year = ""	
	end
	return circa_ending .. " " .. day .. " " .. month  .. " " .. year .. " "  .. date_ending
end

return p