Open main menu
Home
Random
Donate
Recent changes
Special pages
Community portal
Preferences
About Stockhub
Disclaimers
Search
User menu
Talk
Contributions
Create account
Log in
Editing
Module:Sandbox/Erutuon/Unicode
Warning:
You are not logged in. Your IP address will be publicly visible if you make any edits. If you
log in
or
create an account
, your edits will be attributed to your username, along with other benefits.
Anti-spam check. Do
not
fill this in!
local p = {} local Unicode_data = require "Module:Unicode data/sandbox" local function errorf(level, ...) if type(level) == "number" then return error(string.format(...), level + 1) else -- level is actually the format string. return error(string.format(level, ...), 2) end end function mw.logf(...) return mw.log(string.format(...)) end local output_mt = {} function output_mt:insert(str) self.n = self.n + 1 self[self.n] = str end -- also in [[Module:Unicode data/documentation functions]] function output_mt:insert_format(...) self:insert(string.format(...)) end output_mt.join = table.concat output_mt.__index = output_mt local function Output() return setmetatable({ n = 0 }, output_mt) end local Latn_pattern = table.concat { "[", "\n\32-\127", "\194\160-\194\172", "\195\128-\195\191", "\196\128-\197\191", "\198\128-\201\143", "\225\184\128-\225\187\191", "\226\177\160-\226\177\191", "\234\156\160-\234\159\191", "\234\172\176-\234\173\175", "\239\172\128-\239\172\134", "\239\188\129-\239\188\188", "β", "β", "Β«", "Β»", "]", }; local get_codepoint = mw.ustring.codepoint local function expand_range(start, ending) local lower, higher = get_codepoint(start), get_codepoint(ending) if higher < lower then return nil end local chars = {} local i = 0 for codepoint = lower, higher do i = i + 1 chars[i] = mw.ustring.char(codepoint) end return table.concat(chars) end local fun = require "Module:Fun" local m_table = require "Module:TableTools" local script_to_count_mt = { __index = function (self, key) self[key] = 0 return 0 end, __call = function (self, ...) return setmetatable({}, self) end } setmetatable(script_to_count_mt, script_to_count_mt) -- Uses an iterator (such as mw.ustring.gcodepoint) that generates a codepoint -- each time it is called with an optional state and another value. local function show_scripts(iterator, state, value) local script_to_count = script_to_count_mt() for codepoint in iterator, state, value do local script = Unicode_data.lookup_script(codepoint) script_to_count[script] = script_to_count[script] + 1 end return table.concat( fun.mapIter( function (count, script) return ("%s (%d)"):format(script, count) end, m_table.sortedPairs( script_to_count, function (script1, script2) return script_to_count[script1] > script_to_count[script2] end)), ", ") end local function get_chars_in_scripts(iterator, state, value) local script_to_char_set = {} for codepoint in iterator, state, value do local script = Unicode_data.lookup_script(codepoint) script_to_char_set[script] = script_to_char_set[script] or {} script_to_char_set[script][codepoint] = true end return script_to_char_set end local function print_char_set_map(script_to_char_set, format, separator) format = format or "%s: %s" separator = separator or "\n" return table.concat( fun.mapIter( function (char_set, script) local char_list = fun.mapIter( function (_, codepoint) return mw.ustring.char(codepoint) end, m_table.sortedPairs(char_set)) return (format):format(script, mw.text.nowiki(table.concat(char_list))) end, m_table.sortedPairs(script_to_char_set)), separator) end function p.show(frame) local expanded_pattern = Latn_pattern :gsub("%[(.-)%]", "%1") :gsub( -- Find two UTF-8-encoded characters separated by hyphen-minus. "([%z\1-\127\194-\244][\128-\191]*)%-([%z\1-\127\194-\244][\128-\191]*)", function (char1, char2) return expand_range(char1, char2) end) return ('* <div style="overflow-wrap: break-word;">%s</div><br>%s') :format(expanded_pattern :gsub("^%s*", ""), -- Remove initial "\n " to avoid creating unwanted pre element. show_scripts(mw.ustring.gcodepoint(expanded_pattern))) end local function get_block_info_from_arg(args, arg) local block_name = args[1] or errorf("Parameter %s is required", tostring(arg)) local block_info = Unicode_data.get_block_info(block_name) or errorf("The block '%s' could be found", block_name) return block_info end local function get_boolean_from_arg(args, arg) return args[arg] and require "Module:Yesno" (args[arg]) end function p.scripts_in_block(frame) local block_info = get_block_info_from_arg(frame.args, 1) local show_block_name = get_boolean_from_arg(frame.args, 2) local script_list = show_scripts(fun.range(block_info[1], block_info[2])) if show_block_name then return ("%s: %s"):format(block_info[3], script_list) else return script_list end end local function link_block_name(block_name) if block_name:find " " then return ("[[%s]]"):format(block_name) else return ("[[%s (Unicode block)|%s]]"):format(block_name, block_name) end end function p.scripts_in_blocks(frame) local output = Output() local start = frame.args[1] and tonumber(frame.args[1], 16) or 0 local ending = frame.args[2] and tonumber(frame.args[2], 16) or 0x4000 local script_data = mw.loadData "Module:Unicode data/scripts" local singles = script_data.singles local ranges = script_data.ranges local function clear (self) for _, key in ipairs(m_table.keysToList(self, false)) do self[key] = nil end end local counts = {} setmetatable(counts, { __index = { increment = function(self, script_code, amount) self[script_code] = (self[script_code] or 0) + (amount or 1) end, clear = clear, } }) local codepoints_per_script = {} setmetatable(codepoints_per_script, { __index = { add = function(self, script_code, codepoint) self[script_code] = self[script_code] or { n = 0 } if self[script_code].n <= 0x20 and not (codepoint <= 0x9F and (codepoint >= 0x80 or codepoint <= 0x1F)) then if self[script_code].n == 0x20 then local period = ("."):byte() for _ = 1, 3 do self[script_code].n = self[script_code].n + 1 self[script_code][self[script_code].n] = period end else if script_code == "Zinh" then -- probably combining character self[script_code].n = self[script_code].n + 1 self[script_code][self[script_code].n] = 0x25CC end self[script_code].n = self[script_code].n + 1 self[script_code][self[script_code].n] = codepoint end end end, clear = clear, } }) output:insert [[ {| class="wikitable" |+ Scripts in each Unicode block ! block !! codepoints !! scripts ]] for _, block in pairs(mw.loadData "Module:Unicode data/blocks") do local codepoint = block[1] if codepoint > ending then break end if codepoint >= start then while codepoint <= block[2] do local script = singles[codepoint] local count if script then -- Codepoint is in "singles" map. counts:increment(script) codepoints_per_script:add(script, codepoint) codepoint = codepoint + 1 count = 1 -- for potential future use else local range, index = Unicode_data.binary_range_search(codepoint, ranges) if range then -- Codepoint is in "ranges" array. count = 0 script = range[3] while codepoint <= range[2] and codepoint <= block[2] do count = count + 1 codepoints_per_script:add(script, codepoint) codepoint = codepoint + 1 end counts:increment(script, count) else -- Codepoint doesn't have data; it's Zzzz. -- Get range immediately above codepoint. while ranges[index][2] < codepoint do index = index + 1 end count = 0 script = "Zzzz" local range = ranges[index] while codepoint < range[1] and codepoint <= block[2] and not singles[codepoint] do count = count + 1 codepoint = codepoint + 1 end counts:increment(script, count) end end end output:insert_format([[ |- | %s | U+%04X–U+%04X | %s ]], link_block_name(block[3]), block[1], block[2], table.concat( fun.map( function (count, script) return ('<abbr title="%s">%s</abbr> (<span title="%s">%d</span>)') :format( script_data.aliases[script], script, codepoints_per_script[script] and mw.text.nowiki(mw.ustring.char( unpack(codepoints_per_script[script]))) or "", count) end, m_table.sortedPairs( counts, function (script1, script2) return counts[script1] > counts[script2] end)), ", ")) end -- mw.logObject(codepoints_per_script, block[3]) counts:clear() codepoints_per_script:clear() end output:insert "|}" return output:join() end function p.chars_in_scripts_in_block(frame) local block_info = get_block_info_from_arg(frame.args, 1) local show_block_name = get_boolean_from_arg(frame.args, 2) local script_char_set_map = print_char_set_map( get_chars_in_scripts(fun.range(block_info[1], block_info[2]))) if show_block_name then return ("%s: %s"):format(block_info[3], script_char_set_map) else return script_char_set_map end end function p.search_for_language_codes(frame) local page_name = frame.args[1] or "English language" local success, title_object = pcall(mw.title.new, page_name) if not (success and title_object) then mw.logf("Could not make title object for '%s'.", page_name) return end local content = title_object:getContent() local language_codes = {} for lang_template in content:gmatch "{{lang[^}]+" do local template_name = lang_template:match("{{([^|}]+)") local language_code if template_name == "lang" then language_code = lang_template:match "{{lang|([^|}]+)" elseif template_name:find "^lang-" then language_code = lang_template:match "{{lang-([^|}]+)" end if language_code then language_codes[language_code] = true end end return table.concat(m_table.keysToList(language_codes), ", ") end return p
Summary:
Please note that all contributions to Stockhub may be edited, altered, or removed by other contributors. If you do not want your writing to be edited mercilessly, then do not submit it here.
You are also promising us that you wrote this yourself, or copied it from a public domain or similar free resource (see
Stockhub:Copyrights
for details).
Do not submit copyrighted work without permission!
Cancel
Editing help
(opens in new window)
Templates used on this page:
Module:Exponential search
(
edit
)
Module:Fun
(
edit
)
Module:Lang/data
(
edit
)
Module:Language/data/iana languages
(
edit
)
Module:Language/data/iana regions
(
edit
)
Module:Language/data/iana scripts
(
edit
)
Module:Language/data/iana suppressed scripts
(
edit
)
Module:Language/data/iana variants
(
edit
)
Module:Sandbox/Erutuon/Unicode
(
edit
)
Module:Sandbox/Erutuon/Unicode/doc
(
edit
)
Module:TableTools
(
edit
)
Module:Unicode data/sandbox
(
edit
)
Module:Unicode data/scripts
(
edit
)