Modul:aksara
Pidangan
Dokuméntasi pikeun modul ieu tiasa dijieun dina kaca Modul:aksara/doc.
local M = {}
local function in_ranges(cp, ranges)
for i = 1, #ranges do
local r = ranges[i]
if cp >= r[1] and cp <= r[2] then
return true
end
end
return false
end
-- Ignore combining diacritical marks
local IGNORE_COMBINING = {
{0x0300, 0x036F}, {0x1AB0, 0x1AFF}, {0x1DC0, 0x1DFF},
{0x20D0, 0x20FF}, {0xFE20, 0xFE2F}
}
local function is_ignorable(cp)
return in_ranges(cp, IGNORE_COMBINING)
end
-- Script data
M.scripts = {
Arab = { name = "Arabic", class = "Arab", ranges = { {0x0600,0x06FF},{0x0750,0x077F},{0x08A0,0x08FF},{0xFB50,0xFDFF},{0xFE70,0xFEFF},{0x1EE00,0x1EEFF} } },
Armn = { name = "Armenian", class = "Armn", ranges = { {0x0530,0x058F},{0xFB13,0xFB17} } },
Bali = { name = "Balinese", class = "Bali", ranges = { {0x1B00,0x1B7F} } },
Batk = { name = "Batak", class = "Batk", ranges = { {0x1BC0,0x1BFF} } },
Beng = { name = "Bengali", class = "Beng", ranges = { {0x0980,0x09FF} } },
Bopo = { name = "Bopomofo", class = "Bopo", ranges = { {0x3100,0x312F},{0x31A0,0x31BF} } },
Brai = { name = "Braille", class = "Brai", ranges = { {0x2800,0x28FF} } },
Bugi = { name = "Buginese", class = "Bugi", ranges = { {0x1A00,0x1A1F} } },
Buhd = { name = "Buhid", class = "Buhd", ranges = { {0x1740,0x175F} } },
Cham = { name = "Cham", class = "Cham", ranges = { {0xAA00,0xAA5F} } },
Copt = { name = "Coptic", class = "Copt", ranges = { {0x2C80,0x2CFF},{0x03E2,0x03EF} } },
Cyrl = { name = "Cyrillic", class = "Cyrl", ranges = { {0x0400,0x04FF},{0x0500,0x052F},{0x2DE0,0x2DFF},{0xA640,0xA69F},{0x1C80,0x1C8F} } },
Deva = { name = "Devanagari", class = "Deva", ranges = { {0x0900,0x097F},{0xA8E0,0xA8FF} } },
Dsrt = { name = "Deseret", class = "Dsrt", ranges = { {0x10400,0x1044F} } },
Egyp = { name = "Egyptian Hieroglyphs", class = "Egyp", ranges = { {0x13000,0x1342F} } },
Ethi = { name = "Ethiopic", class = "Ethi", ranges = { {0x1200,0x137F},{0x1380,0x139F},{0x2D80,0x2DDF},{0xAB00,0xAB2F} } },
Geor = { name = "Georgian", class = "Geor", ranges = { {0x10A0,0x10FF},{0x2D00,0x2D2F},{0x1C90,0x1CBF} } },
Glag = { name = "Glagolitic", class = "Glag", ranges = { {0x2C00,0x2C5F},{0x1E000,0x1E02F} } },
Goth = { name = "Gothic", class = "Goth", ranges = { {0x10330,0x1034F} } },
Grek = { name = "Greek", class = "Grek", ranges = { {0x0370,0x03FF},{0x1F00,0x1FFF} } },
Gujr = { name = "Gujarati", class = "Gujr", ranges = { {0x0A80,0x0AFF} } },
Guru = { name = "Gurmukhi", class = "Guru", ranges = { {0x0A00,0x0A7F} } },
Hang = { name = "Hangul", class = "Hang", ranges = { {0x1100,0x11FF},{0x3130,0x318F},{0xAC00,0xD7AF},{0xA960,0xA97F},{0xD7B0,0xD7FF} } },
Hani = { name = "Han", class = "Hani", ranges = { {0x4E00,0x9FFF},{0x3400,0x4DBF} } },
Hano = { name = "Hanunoo", class = "Hano", ranges = { {0x1720,0x173F} } },
Hebr = { name = "Hebrew", class = "Hebr", ranges = { {0x0590,0x05FF},{0xFB1D,0xFB4F} } },
Hira = { name = "Hiragana", class = "Hira", ranges = { {0x3040,0x309F} } },
Ital = { name = "Old Italic", class = "Ital", ranges = { {0x10300,0x1032F} } },
Java = { name = "Javanese", class = "Java", ranges = { {0xA980,0xA9DF} } },
Kana = { name = "Katakana", class = "Kana", ranges = { {0x30A0,0x30FF},{0x31F0,0x31FF},{0xFF66,0xFF9D} } },
Khmr = { name = "Khmer", class = "Khmr", ranges = { {0x1780,0x17FF},{0x19E0,0x19FF} } },
Knda = { name = "Kannada", class = "Knda", ranges = { {0x0C80,0x0CFF} } },
Lana = { name = "Tai Tham (Lanna)", class = "Lana", ranges = { {0x1A20,0x1AAF} } },
Laoo = { name = "Lao", class = "Laoo", ranges = { {0x0E80,0x0EFF} } },
Latn = { name = "Latin", class = "Latn", ranges = { {0x0041,0x005A},{0x0061,0x007A},{0x00C0,0x00FF},{0x0100,0x017F},{0x0180,0x024F},{0x0250,0x02AF},{0x02B0,0x02FF},{0x1D00,0x1D7F},{0x1D80,0x1DBF},{0x1E00,0x1EFF},{0x2C60,0x2C7F},{0xA720,0xA7FF},{0xAB30,0xAB6F} } },
Limb = { name = "Limbu", class = "Limb", ranges = { {0x1900,0x194F} }},
Linb = { name = "Linear B", class = "Linb", ranges = { {0x10000,0x1007F},{0x10080,0x100FF} }},
Maka = { name = "Makasar", class = "Maka", ranges = { {0x11EE0,0x11EFF} }},
Mlym = { name = "Malayalam", class = "Mlym", ranges = { {0x0D00,0x0D7F} }},
Mong = { name = "Mongolian", class = "Mong", ranges = { {0x1800,0x18AF},{0x11660,0x1167F},{0x18B0,0x18FF} }},
Mymr = { name = "Myanmar", class = "Mymr", ranges = { {0x1000,0x109F},{0xAA60,0xAA7F},{0xA9E0,0xA9FF} }},
Nkoo = { name = "N’Ko", class = "Nkoo", ranges = { {0x07C0,0x07FF} }},
Ogam = { name = "Ogham", class = "Ogam", ranges = { {0x1680,0x169F} }},
Orya = { name = "Oriya", class = "Orya", ranges = { {0x0B00,0x0B7F} }},
Phnx = { name = "Phoenician", class = "Phnx", ranges = { {0x10900,0x1091F} }},
Rjng = { name = "Rejang", class = "Rjng", ranges = { {0xA930,0xA95F} }},
Runr = { name = "Runic", class = "Runr", ranges = { {0x16A0,0x16FF} }},
Sinh = { name = "Sinhala", class = "Sinh", ranges = { {0x0D80,0x0DFF} }},
Sund = { name = "Sundanese", class = "Sund", ranges = { {0x1B80,0x1BBF},{0x1CC0,0x1CCF} }},
Syrc = { name = "Syriac", class = "Syrc", ranges = { {0x0700,0x074F},{0x0860,0x086F} }},
Tagb = { name = "Tagbanwa", class = "Tagb", ranges = { {0x1760,0x177F} }},
Tale = { name = "Tai Le", class = "Tale", ranges = { {0x1950,0x197F} }},
Talu = { name = "New Tai Lue", class = "Talu", ranges = { {0x1980,0x19DF} }},
Taml = { name = "Tamil", class = "Taml", ranges = { {0x0B80,0x0BFF} }},
Tavt = { name = "Tai Viet", class = "Tavt", ranges = { {0xAA80,0xAADF} }},
Telu = { name = "Telugu", class = "Telu", ranges = { {0x0C00,0x0C7F} }},
Thaa = { name = "Thaana", class = "Thaa", ranges = { {0x0780,0x07BF} }},
Thai = { name = "Thai", class = "Thai", ranges = { {0x0E00,0x0E7F} }},
Tfng = { name = "Tifinagh", class = "Tfng", ranges = { {0x2D30,0x2D7F} }},
Tglg = { name = "Tagalog", class = "Tglg", ranges = { {0x1700,0x171F},{0x1730,0x173F} }},
Tibt = { name = "Tibetan", class = "Tibt", ranges = { {0x0F00,0x0FFF} }},
Ugar = { name = "Ugaritic", class = "Ugar", ranges = { {0x10380,0x1039F} }},
Vaii = { name = "Vai", class = "Vaii", ranges = { {0xA500,0xA63F} }},
Xpeo = { name = "Old Persian", class = "Xpeo", ranges = { {0x103A0,0x103DF} }},
Xsux = { name = "Cuneiform", class = "Xsux", ranges = { {0x12000,0x123FF},{0x12400,0x1247F},{0x12480,0x1254F} }},
}
-- Alphabetical detection order
M.order = {}
for code, _ in pairs(M.scripts) do
table.insert(M.order, code)
end
table.sort(M.order)
-- Detect script
function M.detect(text)
if not text or text == "" then return "Latn" end
local cps = { mw.ustring.codepoint(text, 1, -1) }
for i = 1, #cps do
local cp = cps[i]
if not is_ignorable(cp) then
for j = 1, #M.order do
local code = M.order[j]
if in_ranges(cp, M.scripts[code].ranges) then
return code
end
end
end
end
return "Latn"
end
-- Get CSS class
function M.getClass(code)
if code and M.scripts[code] then
return M.scripts[code].class
end
return nil
end
-- Wrap text with a <span> only if script is non-Latin
function M.wrap(text, sc)
if not text or not sc then return text end
if sc == "Latn" then
return text -- don't wrap Latin
end
local scClass = M.getClass(sc)
if scClass then
return string.format('<span class="%s">%s</span>', scClass, text)
else
return text -- no wrapping if unknown
end
end
-- Wrapper for template use: detect script and wrap text if non-Latin
function M.detectWrap(frame)
local text = frame.args["text"]
if not text or text == "" then return "" end
local script = M.detect(text)
return M.wrap(text, script)
end
return M