Documentation for this module may be created at Module:Sandbox/Erutuon/author citation/doc

local p = {}

local function advance_pos_if_starts_with(str, pattern, pos)
	local i, j = mw.ustring.find(str, pattern, pos)
	if i == pos and j >= i then
		return j + 1
	else
		return pos
	end
end

local function advance_by_prefixes(str, prefixes, pos)
	for _, prefix in ipairs(prefixes) do
		pos = advance_pos_if_starts_with(str, prefix, pos)
	end
	
	return pos
end

-- [[d:Property:P428#P1793]]
-- ('t )?(d')?(de )?(la )?(van (der )?)?(Ma?c)?(De)?(Di)?\p{Lu}?C?['\p{Ll}]*([-'. ]*(van )?(y )?(d[ae][nr]?[- ])?(Ma?c)?[\p{Lu}bht]?C?['\p{Ll}]*)*\.? ?f?\.?
function p.find_end_of_author_citation(str, i, j)
	local pos = i or 1
	pos = advance_by_prefixes(str, {
		"'t ", "d'", "de ", "la ", "van der ", "van ", "Ma?c", "De", "Di", "%u?C?['%l]*",
	}, pos)
	
	repeat
		local orig_pos = pos
		pos = advance_by_prefixes(str, {
			"Ma?c", "[%ubht]?C?", "['%l]*", "[-'. ]*", "d[ae][nr]?[- ]", "van ", "y "
		}, pos)
	until orig_pos == pos
	
	pos = advance_pos_if_starts_with(str, "%.? ?f?%.?", pos)
	
	local last_char = mw.ustring.sub(str, pos - 1, pos - 1)
	if last_char == " " or last_char == "-" or last_char == "'" then
		pos = pos - 1
	end
	
	if pos and (not j or pos <= j) then
		return pos - 1
	end
end

-- Does not attempt to validate form of author citation. Returns successfully if
-- the citation is composed of valid author citations separated by
-- single commas, parentheses, or ampersands with optional whitespace around
-- them.
function p.transform_author_abbrevs(citation, func)
	local pos = 1
	local output = {}
	local remaining = citation
	while #remaining > 0 do
		local orig_pos = pos
		pos = advance_pos_if_starts_with(remaining, "%s*[(),&]%s*", pos)
		if orig_pos == pos and #output > 0 then
			return nil
		end
		local author_end = p.find_end_of_author_citation(remaining, pos)
		if not author_end then
			return nil
		end
		local author = mw.ustring.sub(remaining, pos, author_end)
		table.insert(output, mw.ustring.sub(remaining, 1, pos - 1))
		table.insert(output, func(author) or author)
		remaining = mw.ustring.sub(remaining, author_end + 1)
		pos = 1
	end
	
	if remaining == "" then
		return table.concat(output)
	end
end

-- Currently using IPNI convention of no spaces after initials.
local author_abbrevs = {
	["L."] = "Carl Linnaeus",
	["Schldl."] = "Diederich Franz Leonhard von Schlechtendal",
    ["Cham."] = "Adelbert von Chamisso",
    ["B.Boivin"] = "Joseph Robert Bernard Boivin",
    ["A.J.Eames"] = "Arthur Johnson Eames",
}

function p.standardize_abbrev(abbrev)
	return (abbrev:gsub("%.%s+", "."))
end

-- Adapted from getBotanicalAuthorityFromWikiData in [[Module:Taxon authority]].
function p.get_botanist_author_abbreviation(name)
	local wikidata_id = mw.wikibase.getEntityIdForTitle(name)

    if not (wikidata_id and mw.wikibase.isValidEntityId(wikidata_id)) then
    	local title_obj = mw.title.new(name).redirectTarget
		if title_obj and title_obj.text then
			wikidata_id = mw.wikibase.getEntityIdForTitle(name)
		end
    end

	if wikidata_id and mw.wikibase.isValidEntityId(wikidata_id) then -- valid Wikidata id
    	local item = mw.wikibase.getEntity(wikidata_id)
    	local statements = item:getBestStatements('P428')[1] -- botanist author abbreviation
        if statements ~= nil then
	    	return statements.mainsnak.datavalue.value
        end
    end
    return nil
end

function p.link_author_citation(citation)
	return p.transform_author_abbrevs(
		citation,
		function (author)
			local full_name = author_abbrevs[p.standardize_abbrev(author)]
			if full_name then
				return "[[" .. full_name .. "|" .. author .. "]]"
			elseif p.get_botanist_author_abbreviation(author) then
				return "[[" .. author .. "]]"
			end
		end)
end

return p