Documentation for this module may be created at Module:Sandbox/dxa-kly/Dates/doc

local indef = {"about", "approaching", "approximate", "around", "%f[%a]c%.%f[%A]", "%f[%a]ca%f[%A]", "circa", "close to", "doubt", "dubious", "estimate", "in the area of", "in the neighborhood of", "in the neighbourhood of", "in the neighborhood of", "in the region of", "more or less", "near", "or so", "order of", "roughly", "something like", "speculative", "tentative", "uncertain", "unclear", "unreliable", "unsettled", "unsure"}
local months = {"jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec"}
local fullmonths = {"January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"}
local eras = {"ad", "bce", "bc", "ce"}
local bad = "Invalid entry"

local p = {}

function p.day_try( d, m, y )
  if m == 1 or m == 3 or m == 5 or m == 7 or m == 8 or m == 10 or m == 12 then
    d_in_m = 31
  elseif m == 4 or m == 6 or m == 9 or m == 11 then
    d_in_m = 30
  elseif m == 2 then
    if y % 4 < 1 and y % 400 > 0 then
      d_in_m = 29
    else
      d_in_m = 28
    end
  else
    d_in_m = 0
  end
  if d > 0 and d <= d_in_m then return 1 end
end

function p.era_try( str, ptn, idx )
  for i = 1, #eras do
    values = {str:match(ptn .. " (" .. eras[i] .. ")")}
    for j = 1, #values do
      if j == idx then
        if idx > 2 and idx < 5 then
          return values[idx]
        elseif idx == 2 then
          return values[1], values[2]
        end
      end
    end
  end
end

function p.indef_try( str )
  for i = 1, #indef do
    if str:match(indef[i]) then return true end
  end
  return false
end

function p.mth_str_to_int( m )
  for i = 1, #months do
    if months[i] == m then return i end
  end
end

function p.slash_try( str )
  sls_num1, sls_num2, sls_y = str:match("(%d+)/(%d+)/(%d+)")
  sls_num1 = tonumber(sls_num1)
  sls_num2 = tonumber(sls_num2)
  if sls_num1 then
    if sls_num2 > 12 then
      if not p.day_try(sls_num2, sls_num1, sls_y) then return bad end
      return "iso", sls_y, sls_num1, sls_num2
    else
      if not p.day_try(sls_num1, sls_num2, sls_y) then return bad end
      return "iso", sls_y, sls_num2, sls_num1
    end
  end
end

function p.hyphenated_try( str )
  hyn_y, hyn_m, hyn_d = str:match("(%d+)-(%d+)-(%d+)")
  hyn_y = tonumber(hyn_y)
  hyn_m = tonumber(hyn_m)
  hyn_d = tonumber(hyn_d)
  if hyn_y then
    if not p.day_try(hyn_d, hyn_m, hyn_y) then return bad end
    return "iso", hyn_y, hyn_m, hyn_d
  end
end

function p.named_month_try( str )
  for _, pattern in pairs( months ) do
    dmy_d, dmy_m, dmy_y = str:match( "(%d+).-(" .. pattern .. ").-(%d+)" )
    mdy_m, mdy_d, mdy_y = str:match( "(" .. pattern .. ").-(%d+)[^%d]+(%d+)")
    if dmy_d then
      dmy_d = tonumber(dmy_d)
      dmy_m = p.mth_str_to_int(dmy_m)
      dmy_y = tonumber(dmy_y)
      if not p.day_try(tonumber(dmy_d), dmy_m, tonumber(dmy_y)) then return bad end
      era = p.era_try(str, "(%d+).-(" .. pattern .. ").-(%d+)", 4)
      if era then
        return "dmy", dmy_y, dmy_m, dmy_d, era
      else
        return "dmy", dmy_y, dmy_m, dmy_d
      end
    elseif mdy_m then
      mdy_m = p.mth_str_to_int(mdy_m)
      mdy_d = tonumber(mdy_d)
      mdy_y = tonumber(mdy_y)
      if not p.day_try(tonumber(mdy_d), mdy_m, tonumber(mdy_y)) then return bad end
      era = p.era_try(str, "(" .. pattern .. ").-(%d+)[^%d]+(%d+)", 4)
      if era then
        return "mdy", mdy_y, mdy_m, mdy_d, era
      else
        return "mdy", mdy_y, mdy_m, mdy_d
      end
    end
  end
  fail1_d, fail1_m, fail1_y = str:match( "(%d+).-(%a+).-(%d+)" )
  fail2_m, fail2_d, fail2_y = str:match( "(%a+).-(%d+)[^%d]+(%d+)")
  if fail1_d or fail2_m then return bad end
end

function p.unclassified1_try( str, ptn )
  un1_d, un1_m = str:match( "(%d+).-(" .. ptn .. ")" )
  un1_d = tonumber(un1_d)
  un1_m = p.mth_str_to_int(un1_m)
  if un1_d and p.day_try(un1_d, un1_m, 1900) then return "dm", un1_d, fullmonths[un1_m] end
end

function p.unclassified2_try( str, ptn )
  un2_m, un2_d = str:match( "(" .. ptn .. ").-(%d+)" )
  un2_d = tonumber(un2_d)
  un2_m = p.mth_str_to_int(un2_m)
  if un2_d then
    era = p.era_try(str, "(" .. ptn .. ").-(%d+)", 3)
    if era then
      return "my", fullmonths[un2_m], un2_d, era
    elseif p.day_try(un2_d, un2_m, 1900) then
      return "md", fullmonths[un2_m], un2_d
    else
      return "my", fullmonths[un2_m], un2_d
    end
  end
end

function p.unclassified3_try( str )
  un3_y, era = p.era_try(str, "(%d+)", 2)
  if era then return "ye", un3_y, era end
end

function p.unclassified4_try( str )
  t = {}
  for a in str:gmatch("%d+") do table.insert(t, tonumber(a)) end
  table.sort(t)
  if not (t[#t] == nil) then return "num", t[#t] end
end

function p.parsedate( inp )
  inp = inp or ""
  if not (type(inp) == "string") then return bad end
  inp = inp:lower()
  has_indef = p.indef_try(inp)
  if p.slash_try(inp) then
    return has_indef, p.slash_try(inp)
  elseif p.hyphenated_try(inp) then
    return has_indef, p.hyphenated_try(inp)
  elseif p.named_month_try(inp) then
    return has_indef, p.named_month_try(inp)
  else
    for _, pattern in pairs( months ) do
      if p.unclassified1_try( inp, pattern ) then
        return has_indef, p.unclassified1_try(inp, pattern)
      elseif p.unclassified2_try( inp, pattern ) then
        return has_indef, p.unclassified2_try(inp, pattern)
      end
    end
    if p.unclassified3_try(inp) then
      return has_indef, p.unclassified3_try(inp)
    elseif p.unclassified4_try(inp) then
      return has_indef, p.unclassified4_try(inp)
    end
  end
  return bad
end

function p.unpackdate( frame )
  s = {p.parsedate(frame.args.text)}
  format = frame.args.format
  uncertainty = ""
  era = ""
  if s[1] then uncertainty = "circa " end
  for i = 1, #s do
    if s[i] == "Invalid entry" then return s[i] end
  end
  if s[2] == "iso" or s[2] == "dmy" or s[2] == "mdy" then
    year = s[3]
    month = s[4]
    day = s[5]
    if s[6] then era = s[6]:upper() end
    if s[2] == "iso" then
      default = string.format("%04d-%02d-%02d", year, month, day)
    elseif s[2] == "dmy" then
      default = day .. " " .. fullmonths[month] .. " " .. year .. " " .. era
    else
      default = fullmonths[month] .. " " .. day .. ", " .. year .. " " .. era
    end
  elseif s[2] == "dm" or s[2] == "md" then
    default = s[3] .. " " .. s[4]
  elseif s[2] == "ye" then
    default = s[3] .. " " .. s[4]:upper()
  elseif s[2] == "my" then
    default = s[3] .. " " .. s[4]
    if s[5] then default = default .. " " .. s[5]:upper() end
  else
    default = s[3]
  end
  if format then
    if s[2] == "iso" or s[2] == "dmy" or s[2] == "mdy" then
      if format == s[2] then return uncertainty .. default end
      if format == "iso" then return uncertainty .. string.format("%04d-%02d-%02d", year, month, day) end
      if format == "dmy" then return uncertainty .. day .. " " .. fullmonths[month] .. " " .. year .. " " .. era end
      if format == "mdy" then return uncertainty .. fullmonths[month] .. " " .. day .. ", " .. year .. " " .. era end
      if format == "year" then return uncertainty .. year .. " " .. era end
    end
    return "Cannot apply format"
  else
    return uncertainty .. default
  end
end

return p