Open main menu
Home
Random
Donate
Recent changes
Special pages
Community portal
Preferences
About Stockhub
Disclaimers
Search
User menu
Talk
Contributions
Create account
Log in
Editing
Module:Language/data/sandbox
Warning:
You are not logged in. Your IP address will be publicly visible if you make any edits. If you
log in
or
create an account
, your edits will be attributed to your username, along with other benefits.
Anti-spam check. Do
not
fill this in!
local U = mw.ustring.char -- Diacritics, from the [[Combining Diacritical Marks]] block. local grave = U(0x300) local acute = U(0x301) local circumflex = U(0x302) local tilde = U(0x303) local macron = U(0x304) local breve = U(0x306) local dot = U(0x307) local diaeresis = U(0x308) local double_acute = U(0x30B) local caron = U(0x30C) local double_grave = U(0x30F) local invbreve = U(0x311) local dot_below = U(0x323) local undertie = U(0x35C) --[[ This is a table of Wiktionary language codes with data belonging to them. Name is the "canonical name" used on Wiktionary. ]] local data = { ["languages"] = { ["ab"] = { ["name"] = "Abkhaz", }, ["ang"] = { -- Remove macrons, acutes, and overdots ["replacements"] = { decompose = true, from = { "[" .. macron .. acute .. dot .. "]" }, }, }, ["ar"] = { ["direction"] = "rtl", -- Should be in the script data module. ["replacements"] = { -- ālif with wasla is replaced by ālif; [U(0x0671)] = U(0x0627), -- taṭwīl, fatḥatan, ḍammatan, kasratan, -- fatḥa, ḍamma, kasra, -- shadda, sukūn, and superscript (dagger) ālif are removed. ["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D) ..U(0x064E)..U(0x064F)..U(0x0650) ..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "", }, }, ["bal"] = { ["name"] = "Baluchi", }, ["be"] = { ["replacements"] = { [acute] = "", }, }, ["bua"] = { ["name"] = "Buryat", }, ["cu"] = { ["name"] = "Old Church Slavonic", }, ["egy"] = { ["name"] = "Egyptian", }, ["frp"] = { ["name"] = "Franco-Provençal", }, ["goh"] = { ["replacements"] = { decompose = true, from = { "[" .. macron .. circumflex .. diaeresis .. "]", }, }, }, ["got"] = { ["replacements"] = { -- Latin to Gothic since people will not want to have to copy -- and paste Gothic letters in ["[AÁaáĀā]"] = "𐌰", ["[Bb]"] = "𐌱", ["[Gg]"] = "𐌲", ["[Dd]"] = "𐌳", ["[EeĒē]"] = "𐌴", ["[Qq]"] = "𐌵", ["[Zz]"] = "𐌶", ["[Hh]"] = "𐌷", ["[Þþ]"] = "𐌸", ["[IiÍí]"] = "𐌹", ["[Kk]"] = "𐌺", ["[Ll]"] = "𐌻", ["[Mm]"] = "𐌼", ["[Nn]"] = "𐌽", ["[Jj]"] = "𐌾", ["[UuÚúŪū]"] = "𐌿", ["[Pp]"] = "𐍀", ["[Rr]"] = "𐍂", ["[Ss]"] = "𐍃", ["[Tt]"] = "𐍄", ["[WwYy]"] = "𐍅", ["[Ff]"] = "𐍆", ["[Xx]"] = "𐍇", ["[Ƕƕ]"] = "𐍈", -- Not sure if "hw" and "hv" can safely be converted ["[OoŌō]"] = "𐍉", }, }, ["grc"] = { ["replacements"] = { decompose = true, from = { -- Replace variant letterforms with standard ones. "ϐ", "ϵ", "ϑ", "ϰ", "ϱ", "ϲ", "ϕ", -- Remove macrons and breves. "[" .. macron .. breve .. undertie .. "]" }, to = { "β", "ε", "θ", "κ", "ρ", "σ", "φ", } }, }, ["ha"] = { -- remove tilde, grave, acute, macron, circumflex ["replacements"] = { decompose = true, from = { "[" .. grave .. circumflex .. macron .. acute .. tilde .. "]" }, }, }, ["jbo"] = { ["type"] = "appendix", }, ["la"] = { ["replacements"] = { decompose = true, from = { "[" .. macron .. breve .. diaeresis .. "]" }, }, }, ["lt"] = { -- remove acute, tilde, grave ["replacements"] = { decompose = true, from = { "[" .. acute .. tilde .. grave .. "]" }, }, }, ["moe"] = { ["name"] = "Cree", }, ["mul"] = { ["name"] = "Translingual", }, ["nci"] = { -- Remove macrons, acutes, circumflexes and graves ["replacements"] = { decompose = true, -- Remove macrons, acutes, circumflexes, graves, and saltillo; -- see [[Saltillo (linguistics)]]. from = { "[" .. grave .. acute .. macron .. circumflex .. "Ꞌꞌʻʼ'ʔ]" }, }, }, ["nds-de"] = { ["name"] = "German Low German", }, ["orv"] = { ["replacements"] = { [U(0x484)] = "", }, }, ["ru"] = { ["replacements"] = { [acute] = "", }, }, ["rw"] = { ["name"] = "Rwanda-Rundi", }, ["se"] = { ["replacements"] = { ["([đflmnŋrsšŧv])'%1"] = "%1%1", }, }, ["sh"] = { ["replacements"] = { decompose = true, from = { "([AaEeIiOoUuRrАаЕеИиОоУуРр])[" .. double_grave .. grave .. invbreve .. acute .. macron .. tilde .. "]" }, to = { "%1" }, }, }, ["sl"] = { ["replacements"] = { decompose = true, -- remove tonal orthography from = {"ł", "[" .. grave .. acute .. macron .. double_grave .. invbreve .. circumflex .. dot_below .. "]"}, to = {"l"}, }, }, ["uk"] = { ["replacements"] = { [acute] = "", } }, ["xcl"] = { ["replacements"] = { ["[՞՜՛՟]"] = "", ["և"] = "եւ", }, }, ["xgf"] = { ["replacements"] = { ["['`ʔ]"] = "ʼ", }, }, -- Custom private-use codes which should be added to [[Module:Lang]]. -- Codes are in the format of "code-x-code" ["gem-x-proto"] = { ["type"] = "reconstructed", }, ["grk-x-proto"] = { ["name"] = "Proto-Hellenic", ["type"] = "reconstructed", }, ["ine-x-proto"] = { ["type"] = "reconstructed", }, ["sem-x-proto"] = { ["type"] = "reconstructed", }, ["sla-x-proto"] = { ["type"] = "reconstructed", ["replacements"] = { ["[ÀÁÃĀȀȂ]"] = "A", ["[àáãāȁȃ]"] = "a", ["[ÈÉẼĒȄȆ]"] = "E", ["[èéẽēȅȇ]"] = "e", ["[ÌÍĨĪȈȊ]"] = "I", ["[ìíĩīȉȋ]"] = "i", ["[ÒÓÕŌȌȎŐ]"] = "O", ["[òóõōȍȏő]"] = "o", ["[ÙÚŨŪȔȖŰ]"] = "U", ["[ùúũūȕȗű]"] = "u", ["[ỲÝỸȲ]"] = "Y", ["[ỳýỹȳ]"] = "y", ["Ǭ"] = "Ǫ", ["ǭ"] = "ǫ", ["[" .. grave .. acute .. double_acute .. tilde .. macron .. double_grave .. invbreve .. "]"] = "", ["ĭ"] = "ь", ["ŭ"] = "ъ", }, }, }, -- Here, keys (for example, "gem") are Wikipedia language codes used in -- {{lang}}, and values (for example, "gem-pro") are the equivalent Wiktionary -- code. -- Subtags are not currently supported. ["redirects"] = { ["aae"] = "sq", ["aiq"] = "fa", ["aln"] = "sq", ["als"] = "sq", ["azb"] = "az", ["azj"] = "az", ["bgn"] = "bal", ["bs"] = "sh", ["bxr"] = "bua", ["ciw"] = "oj", ["cnr"] = "sh", ["fil"] = "tl", ["fuf"] = "ff", ["gem"] = "gem-x-proto", -- Not correct, but is commonly used. ["gmw-ecg"] = "gmw-x-ecg", ["hak"] = "zh", ["hbo"] = "he", ["hr"] = "sh", ["ine"] = "ine-x-proto", -- Not correct, but might be commonly used. ["kjv"] = "sh", ["nan"] = "zh", ["prs"] = "fa", ["rn"] = "rw", ["sli"] = "gmw-x-ecg", ["sr"] = "sh", ["src"] = "sc", ["sro"] = "sc", ["tw"] = "ak", ["wae"] = "gsw", ["wep"] = "nds-de", ["yue"] = "zh", ["xno"] = "fro", -- Incorrect private use tags ["cel-proto"] = "cel-x-proto", ["gem-pro"] = "gem-x-proto", ["grk-pro"] = "grk-x-proto", ["ine-pro"] = "ine-x-proto", ["ine-bsl-pro"] = "ine-x-proto", ["sem-pro"] = "sem-x-proto", ["sla-pro"] = "sla-x-proto", }, } return data
Summary:
Please note that all contributions to Stockhub may be edited, altered, or removed by other contributors. If you do not want your writing to be edited mercilessly, then do not submit it here.
You are also promising us that you wrote this yourself, or copied it from a public domain or similar free resource (see
Stockhub:Copyrights
for details).
Do not submit copyrighted work without permission!
Cancel
Editing help
(opens in new window)
Template used on this page:
Module:Language/data/sandbox/doc
(
edit
)