Module:Language/data
Documentation for this module may be created at Module:Language/data/doc
local U = mw.ustring.char
-- diacritics
local grave = U(0x300)
local acute = U(0x301)
local double_acute = U(0x30B)
local tilde = U(0x303)
local macron = U(0x304)
local dgrave = U(0x30F)
local invbreve = U(0x311)
--[[ Name is the "canonical name" used on Wiktionary. Article is the Wikipedia article. Script is the ISO 15924 code. ]]
local data = {
["languages"] = {
["ang"] = {
["name"] = "Old English",
["article"] = {"Old English"},
-- ["scripts"] = {"Latn"},
-- Remove macrons, acutes, and overdots
["replacements"] = {
["[ĀÁ]"] = "A",
["[āá]"] = "a",
["[ǢǼ]"] = "Æ",
["[ǣǽ]"] = "æ",
["Ċ"] = "C",
["ċ"] = "c",
["[ĒÉ]"] = "E",
["[ēé]"] = "e",
["Ġ"] = "G",
["ġ"] = "g",
["[ĪÍ]"] = "I",
["[īí]"] = "i",
["[ŌÓ]"] = "O",
["[ōó]"] = "o",
["[ŪÚ]"] = "U",
["[ūú]"] = "u",
["[ȲÝ]"] = "Y",
["[ȳý]"] = "y",
},
},
["ar"] = {
["name"] = "Arabic",
["article"] = "Arabic language",
-- ["scripts"] = { "Arab" },
--[[ ālif with wasla is replaced by ālif;
taṭwīl, fatḥatan, ḍammatan, kasratan,
fatḥa, ḍamma, kasra,
shadda, sukūn, and superscript (dagger) ālif are removed. ]]
["direction"] = "rtl", -- Should be in the script data module.
["replacements"] = {
[U(0x0671)] = U(0x0627),
["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
..U(0x064E)..U(0x064F)..U(0x0650)
..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "",
},
},
["be"] = {
["article"] = "Belarusian language",
-- ["scripts"] = { "Cyrl" },
-- Combining acute accent is removed.
["replacements"] = { [U(0x0301)] = "", },
},
["bn"] = {
["name"] = "Bengali",
["article"] = "Bengali language",
-- ["scripts"] = { "Beng" },
},
["cu"] = {
["name"] = "Old Church Slavonic",
["article"] = "Old Church Slavonic",
-- ["scripts"] = { "Cyrs" },
},
["de"] = {
["name"] = "German",
["article"] = "German language",
-- ["scripts"] = { "Latn" },
--[[
["replacements"] = {
["ae"] = "ä",
["oe"] = "ö",
["ue"] = "ü",
["A[Ee]"] = "Ä",
["O[Ee]"] = "Ö",
["U[Ee]"] = "Ü",
},
]]
},
["en"] = {
["name"] = "ⵜⴰⵏⴳⵍⵉⵣⵜ",
["article"] = "ⵜⵓⵜⵍⴰⵢⵜ ⵜⴰⵏⴳⵍⵉⵣⵜ",
-- ["scripts"] = { "Latn" },
},
["es"] = {
["name"] = "Spanish",
["article"] = "Spanish language",
-- ["scripts"] = { "Latn" },
},
["fr"] = {
["name"] = "French",
["article"] = "French language",
-- ["scripts"] = { "Latn" },
},
["frm"] = {
["name"] = "Middle French",
["article"] = "Middle French",
-- ["scripts"] = { "Latn" },
},
["gem-pro"] = {
["name"] = "Proto-Germanic",
["article"] = "Proto-Germanic language",
-- ["scripts"] = { "Latn" },
["type"] = "reconstructed",
["replacements"] = {},
["Wikipedia_code"] = "gem-x-proto",
},
["grc"] = {
["name"] = "Ancient Greek",
["article"] = "Ancient Greek",
-- ["scripts"] = { "Grek" },
["replacements"] = {
-- Vowels with macrons or breves are replaced with plain letters.
["[ᾱᾰ]"] = "α",
["[ᾹᾸ]"] = "Α",
["[ῑῐ]"] = "ι",
["[ῙῘ]"] = "Ι",
["[ῡῠ]"] = "υ",
["[ῩῨ]"] = "Υ",
["ϐ"] = "β",
["ϵ"] = "ε",
["ϑ"] = "θ",
["ϰ"] = "κ",
["ϱ"] = "ρ",
["ϲ"] = "σ",
["ϕ"] = "φ",
},
},
["got"] = {
["name"] = "Gothic",
["article"] = "Gothic language",
-- ["scripts"] = { "Goth" },
["replacements"] = {
-- Latin to Gothic since people will not want to have to copy
-- and paste Gothic letters in
["[AÁaáĀā]"] = "𐌰",
["[Bb]"] = "𐌱",
["[Gg]"] = "𐌲",
["[Dd]"] = "𐌳",
["[EeĒē]"] = "𐌴",
["[Qq]"] = "𐌵",
["[Zz]"] = "𐌶",
["[Hh]"] = "𐌷",
["[Þþ]"] = "𐌸",
["[IiÍí]"] = "𐌹",
["[Kk]"] = "𐌺",
["[Ll]"] = "𐌻",
["[Mm]"] = "𐌼",
["[Nn]"] = "𐌽",
["[Jj]"] = "𐌾",
["[UuÚúŪū]"] = "𐌿",
["[Pp]"] = "𐍀",
["[Rr]"] = "𐍂",
["[Ss]"] = "𐍃",
["[Tt]"] = "𐍄",
["[WwYy]"] = "𐍅",
["[Ff]"] = "𐍆",
["[Xx]"] = "𐍇",
["[Ƕƕ]"] = "𐍈", -- Not sure if "hw" and "hv" can safely be converted
["[OoŌō]"] = "𐍉",
},
},
["grk-pro"] = {
["name"] = "Proto-Hellenic",
["Wikipedia_name"] = "Proto-Greek",
["article"] = "Proto-Greek language",
-- ["scripts"] = { "Latn" },
["type"] = "reconstructed",
["replacements"] = {},
},
["hi"] = {
["name"] = "Hindi",
["article"] = "Hindi",
-- ["scripts"] = { "Deva" },
},
["ine-pro"] = {
["name"] = "Proto-Indo-European",
["article"] = "Proto-Indo-European language",
-- ["scripts"] = { "Latn" },
["type"] = "reconstructed",
["replacements"] = {},
["Wikipedia_code"] = "ine-x-proto",
},
["ja"] = {
["name"] = "Japanese",
["article"] = "Japanese language",
-- ["scripts"] = { "Jpan" },
},
["la"] = {
["name"] = "Latin",
["article"] = "Latin",
-- ["scripts"] = { "Latn" },
["replacements"] = {
-- Vowels with macrons, breves, or diaereses are replaced with plain letters.
["[ĀĂ]"] = "A",
["[āă]"] = "a",
["[ĒĔ]"] = "E",
["[ēĕë]"] = "e",
["[ĪĬÏ]"] = "I",
["[īĭï]"] = "i",
["[ŌŎ]"] = "O",
["[ōŏ]"] = "o",
["[ŪŬÜ]"] = "U",
["[ūŭü]"] = "u",
["Ȳ"] = "Y",
["ȳ"] = "y"
},
},
["mul"] = {
["name"] = "Translingual",
["article"] = "",
-- ["scripts"] = { "" },
},
["orv"] = {
["name"] = "Old East Slavic",
["article"] = "Old East Slavic",
-- ["scripts"] = { "Cyrs" },
["replacements"] = {
[U(0x484)] = "",
},
},
["pt"] = {
["name"] = "Portuguese",
["article"] = "Portuguese language",
-- ["scripts"] = { "Latn" },
},
["pa"] = {
["name"] = "Punjabi",
["article"] = "Punjabi language",
-- ["scripts"] = { "Guru", "Arab", },
},
["ru"] = {
["name"] = "Russian",
["article"] = "Russian language",
-- ["scripts"] = { "Cyrl" },
-- Combining acute accent is removed.
["replacements"] = { [U(0x0301)] = "", },
},
["se"] = {
["replacements"] = {
["([đflmnŋrsšŧv])'%1"] = "%1%1",
},
},
["sh"] = {
["article"] = "Serbo-Croatian language",
-- ["scripts"] = { "Latn", "Cyrl" },
["replacements"] = {
["[ȀÀȂÁĀÃ]"] = "A",
["[ȁàȃáāã]"] = "a",
["[ȄÈȆÉĒẼ]"] = "E",
["[ȅèȇéēẽ]"] = "e",
["[ȈÌȊÍĪĨ]"] = "I",
["[ȉìȋíīĩ]"] = "i",
["[ȌÒȎÓŌÕ]"] = "O",
["[ȍòȏóōõ]"] = "o",
["[ȐȒŔ]"] = "R",
["[ȑȓŕ]"] = "r",
["[ȔÙȖÚŪŨ]"] = "U",
["[ȕùȗúūũ]"] = "u",
["Ѐ"] = "Е",
["ѐ"] = "е",
["[ӢЍ]"] = "И",
["[ӣѝ]"] = "и",
["[Ӯ]"] = "У",
["[ӯ]"] = "у"
},
},
["sla-pro"] = {
["name"] = "Proto-Slavic", -- also Common Slavic
["type"] = "reconstructed",
-- ["scripts"] = { "Latn" },
["replacements"] = {
["[ÀÁÃĀȀȂ]"] = "A",
["[àáãāȁȃ]"] = "a",
["[ÈÉẼĒȄȆ]"] = "E",
["[èéẽēȅȇ]"] = "e",
["[ÌÍĨĪȈȊ]"] = "I",
["[ìíĩīȉȋ]"] = "i",
["[ÒÓÕŌȌȎŐ]"] = "O",
["[òóõōȍȏő]"] = "o",
["[ÙÚŨŪȔȖŰ]"] = "U",
["[ùúũūȕȗű]"] = "u",
["[ỲÝỸȲ]"] = "Y",
["[ỳýỹȳ]"] = "y",
["Ǭ"] = "Ǫ",
["ǭ"] = "ǫ",
["[" .. grave .. acute .. double_acute .. tilde .. macron .. dgrave .. invbreve .. "]"] = "",
},
},
["uk"] = {
["article"] = "Ukrainian language",
-- ["scripts"] = { "Cyrl" },
-- Combining acute accent is removed.
["replacements"] = { [U(0x0301)] = "", }
},
["ur"] = {
["name"] = "Urdu",
["article"] = "Urdu",
-- ["scripts"] = { "Arab" },
},
["zh"] = {
["name"] = "Chinese",
["article"] = "Chinese language",
-- ["scripts"] = { "Hani" },
},
["xcl"] = {
["name"] = "Old Armenian",
["article"] = "Classical Armenian",
-- ["scripts"] = { "Armn" },
["replacements"] = {
["[՞՜՛՟]"] = "",
["և"] = "եւ",
},
},
["xvn"] = {
["name"] = "Vandalic",
["article"] = "Vandalic language",
-- ["scripts"] = { "Latn" },
},
--[[
[""] = {
["name"] = "",
["article"] = "",
-- ["scripts"] = { "" },
},
[""] = {
["name"] = "",
["article"] = "",
-- ["scripts"] = { "" },
["replacements"] = {
},
},
]]
},
["redirects"] = {
["gem"] = "gem-pro", -- Not correct, but is commonly used.
["gem-x-proto"] = "gem-pro",
["ine"] = "ine-pro", -- Not correct, but might be commonly used.
["ine-x-proto"] = "ine-pro",
},
}
return data