Content deleted Content added
examining latn pattern from Module:Lang |
(No difference)
|
Revision as of 19:51, 30 June 2018
Module documentation[view] [edit] [history] [purge]
Lua error in package.lua at line 80: module 'Module:Table' not found.
Lua error in package.lua at line 80: module 'Module:Table' not found.
local p = {}
local Latn_pattern = table.concat {
'[', -- this is a set so include opening bracket
'\n\32-\127', -- C0 Controls and Basic Latin U+0020–U+007E (20 - 7E) + (U+0010 and U+007F <poem>...</poem> support)
'\194\160-\194\172', -- C1 Controls and Latin-1 Supplement U+00A0-U+00AC (C2 A0 - C2 AC)
'\195\128-\195\191', -- (skip shy) U+00C0–U+00FF (C3 80 - C3 BF)
'\196\128-\197\191', -- Latin Extended-A U+0100–U+017F (C4 80 - C5 BF)
'\198\128-\201\143', -- Latin Extended-B U+0180–U+024F (C6 80 - C9 8F)
'\225\184\128-\225\187\191', -- Latin Extended Additional U+1E00-U+1EFF (E1 B8 80 - E1 BB BF)
'\226\177\160-\226\177\191', -- Latin Extended-C U+2C60–U+2C7F (E2 B1 A0 - E2 B1 BF)
'\234\156\160-\234\159\191', -- Latin Extended-D U+A720-U+A7FF (EA 9C A0 - EA 9F BF)
'\234\172\176-\234\173\175', -- Latin Extended-E U+AB30-U+AB6F (EA AC B0 - EA AD AF)
'\239\172\128-\239\172\134', -- Alphabetic Presentaion Forms U+FB00-U+FB06 (EF AC 80 - EF AC 86)
'\239\188\129-\239\188\188', -- Halfwidth and Fullwidth Forms U+FF01-U+FF3C (EF BC 81 - EF BC BC)
'–', -- ndash
'—', -- mdash
'«', '»', -- guillemets commonly used in several 'Latn' languages
']', -- close the set
};
local function expand_range(start, ending)
local lower, higher = get_codepoint(start), get_codepoint(ending)
if higher < lower then
return nil
end
local chars = {}
local i = 0
for codepoint = lower, higher do
i = i + 1
chars[i] = mw.ustring.char(codepoint)
end
return table.concat(chars)
end
local fun = require "Module:Fun"
local m_table = require "Module:Table"
local script_to_count_mt = {
__index = function (self, key)
self[key] = 0
return 0
end,
__call = function (self, ...)
return setmetatable({}, self)
end
}
setmetatable(script_to_count_mt, script_to_count_mt)
local function show_scripts(str)
local script_to_count = script_to_count_mt()
for codepoint in mw.ustring.gcodepoint(str) do
local script = Unicode_data.lookup_script(codepoint)
script_to_count[script] = script_to_count[script] + 1
end
return table.concat(
fun.mapIter(
function (count, script)
return ("%s (%d)"):format(script, count)
end,
m_table.sortedPairs(
script_to_count,
function (script1, script2)
return script_to_count[script1] > script_to_count[script2]
end)),
", ")
end
function p.show(frame)
local expanded_pattern = Latn_pattern:gsub('%[(.-)%]', '%1'):gsub(
'([%z\1-\127\194-\244][\128-\191]*)%-([%z\1-\127\194-\244][\128-\191]*)',
function (char1, char2)
local codepoint1, codepoint2 =
mw.ustring.codepoint(char1), mw.ustring.codepoint(char2)
return expand_range(codepoint1, codepoint2)
end)
return ("%s<br>%s"):format(expanded_pattern, show_scripts(expanded_pattern))
end
return p