Language templates |
---|
Language names ( ISO 639) |
|
Interwiki links |
|
Foreign-language text |
|
Other |
|
The redirects
table in
Module:Language/data connects Wikipedia language codes to the corresponding code used on the English Wiktionary.
wikt:Wiktionary:Language treatment records this relationship for ISO codes. For instance, all of bs
(Bosnian), hr
(Croatian), sr
(Serbian), cnr
(Montenegrin), kjv
(Kajkavian) are placed under the header for sh
(Serbo-Croatian) in Wiktionary entries. See for instance
wikt:kaj#Serbo-Croatian, the word that Kajkavian is named after. The subsumed codes should still be used in language-tagging on Wikipedia.
local U = mw.ustring.char
-- Diacritics, from the [[Combining Diacritical Marks]] block.
local grave = U(0x300)
local acute = U(0x301)
local circumflex = U(0x302)
local tilde = U(0x303)
local macron = U(0x304)
local breve = U(0x306)
local dot = U(0x307)
local diaeresis = U(0x308)
local double_acute = U(0x30B)
local caron = U(0x30C)
local double_grave = U(0x30F)
local invbreve = U(0x311)
local dot_below = U(0x323)
local undertie = U(0x35C)
--[[
This is a table of Wiktionary language codes with data belonging to them.
Name is the "canonical name" used on Wiktionary.
Article is the Wikipedia article.
Script is the ISO 15924 code.
]]
local data = {
"languages" = {
"aaq" = {
"name" = "Penobscot",
},
"ab" = {
"name" = "Abkhaz",
},
"abe" = {
"name" = "Abenaki",
},
"ang" = {
"name" = "Old English",
"article" = {"Old English"},
-- Remove macrons, acutes, and overdots
"replacements" = {
decompose = true,
from = { "[" .. macron .. acute .. dot .. "]" },
},
},
"ar" = {
"name" = "Arabic",
"article" = "Arabic language",
"direction" = "rtl", -- Should be in the script data module.
"replacements" = {
-- ālif with wasla is replaced by ālif;
U(0x0671)] = U(0x0627),
-- taṭwīl, fatḥatan, ḍammatan, kasratan,
-- fatḥa, ḍamma, kasra,
-- shadda, sukūn, and superscript (dagger) ālif are removed.
"["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
..U(0x064E)..U(0x064F)..U(0x0650)
..U(0x0651)..U(0x0652)..U(0x0670).."]" = "",
},
},
"ara" = {
"name" = "Arabic",
"article" = "Arabic language",
"direction" = "rtl", -- Should be in the script data module.
"replacements" = {
-- ālif with wasla is replaced by ālif;
U(0x0671)] = U(0x0627),
-- taṭwīl, fatḥatan, ḍammatan, kasratan,
-- fatḥa, ḍamma, kasra,
-- shadda, sukūn, and superscript (dagger) ālif are removed.
"["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
..U(0x064E)..U(0x064F)..U(0x0650)
..U(0x0651)..U(0x0652)..U(0x0670).."]" = "",
},
},
"arb" = {
"name" = "Modern Standard Arabic",
"article" = "Modern Standard Arabic",
"direction" = "rtl", -- Should be in the script data module.
"replacements" = {
-- ālif with wasla is replaced by ālif;
U(0x0671)] = U(0x0627),
-- taṭwīl, fatḥatan, ḍammatan, kasratan,
-- fatḥa, ḍamma, kasra,
-- shadda, sukūn, and superscript (dagger) ālif are removed.
"["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
..U(0x064E)..U(0x064F)..U(0x0650)
..U(0x0651)..U(0x0652)..U(0x0670).."]" = "",
},
},
"apc" = {
"name" = "North Levantine Arabic",
"article" = "North Levantine Arabic",
"direction" = "rtl", -- Should be in the script data module.
"replacements" = {
-- ālif with wasla is replaced by ālif;
U(0x0671)] = U(0x0627),
-- taṭwīl, fatḥatan, ḍammatan, kasratan,
-- fatḥa, ḍamma, kasra,
-- shadda, sukūn, and superscript (dagger) ālif are removed.
"["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
..U(0x064E)..U(0x064F)..U(0x0650)
..U(0x0651)..U(0x0652)..U(0x0670).."]" = "",
},
},
"ajp" = {
"name" = "South Levantine Arabic",
"article" = "South Levantine Arabic",
"direction" = "rtl", -- Should be in the script data module.
"replacements" = {
-- ālif with wasla is replaced by ālif;
U(0x0671)] = U(0x0627),
-- taṭwīl, fatḥatan, ḍammatan, kasratan,
-- fatḥa, ḍamma, kasra,
-- shadda, sukūn, and superscript (dagger) ālif are removed.
"["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
..U(0x064E)..U(0x064F)..U(0x0650)
..U(0x0651)..U(0x0652)..U(0x0670).."]" = "",
},
},
"arz" = {
"name" = "Egyptian Arabic",
"article" = "Egyptian Arabic",
"direction" = "rtl", -- Should be in the script data module.
"replacements" = {
-- ālif with wasla is replaced by ālif;
U(0x0671)] = U(0x0627),
-- taṭwīl, fatḥatan, ḍammatan, kasratan,
-- fatḥa, ḍamma, kasra,
-- shadda, sukūn, and superscript (dagger) ālif are removed.
"["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
..U(0x064E)..U(0x064F)..U(0x0650)
..U(0x0651)..U(0x0652)..U(0x0670).."]" = "",
},
},
"av" = {
"name" = "Avar"
},
"be" = {
"article" = "Belarusian language",
"replacements" = { acute = "", },
},
"bn" = {
"name" = "Bengali",
"article" = "Bengali language",
},
"bua" = {
"name" = "Buryat",
},
"cel-pro" = { -- Incorrect tag
"name" = "Proto-Celtic",
"Wikipedia_code" = "cel-x-proto",
},
"cel-x-proto" = {
"name" = "Proto-Celtic",
},
"cel-bry-pro" = { -- Incorrect tag
"name" = "Proto-Brythonic",
"article" = "Common Brittonic",
"type" = "reconstructed",
},
"com" = {
"name" = "Comanche",
"article" = "Comanche language",
},
"cu" = {
"name" = "Old Church Slavonic",
"article" = "Old Church Slavonic",
},
"de" = {
"name" = "German",
"article" = "German language",
},
"en" = {
"name" = "English",
"article" = "English language",
},
"es" = {
"name" = "Spanish",
"article" = "Spanish language",
},
"egy" = {
"name" = "Egyptian",
},
"evn" = {
"name" = "Evenki",
"article" = "Evenki language",
},
"fr" = {
"name" = "French",
"article" = "French language",
},
"frm" = {
"name" = "Middle French",
"article" = "Middle French",
},
"frp" = {
"name" = "Franco-Provençal",
},
"ff" = {
"name" = "Fula",
},
"gem-pro" = { -- Incorrect tag
"name" = "Proto-Germanic",
"article" = "Proto-Germanic language",
"type" = "reconstructed",
"replacements" = {},
"Wikipedia_code" = "gem-x-proto",
},
"gem-x-proto" = {
"name" = "Proto-Germanic",
"article" = "Proto-Germanic language",
"type" = "reconstructed",
"replacements" = {},
},
"gml" = {
"name" = "Middle Low German",
},
"gmw-ecg" = {
"name" = "East Central German",
},
"gmw-x-proto" = {
"name" = "Proto-West Germanic",
"article" = "Proto-West Germanic language",
"type" = "reconstructed",
"replacements" = {},
},
"gmq-x-gut" = {
"name" = "Gutnish",
"article" = "Gutnish",
},
"goh" = {
"replacements" = {
decompose = true,
from = {
"[" .. macron .. circumflex .. diaeresis .. "]",
},
},
},
"got" = {
"name" = "Gothic",
"article" = "Gothic language",
"replacements" = {
-- Latin to Gothic since people will not want to have to copy
-- and paste Gothic letters in
"[AÁaáĀā]" = "𐌰",
"[Bb]" = "𐌱",
"[Gg]" = "𐌲",
"[Dd]" = "𐌳",
"[EeĒē]" = "𐌴",
"[Qq]" = "𐌵",
"[Zz]" = "𐌶",
"[Hh]" = "𐌷",
"[Þþ]" = "𐌸",
"[IiÍí]" = "𐌹",
"[Kk]" = "𐌺",
"[Ll]" = "𐌻",
"[Mm]" = "𐌼",
"[Nn]" = "𐌽",
"[Jj]" = "𐌾",
"[UuÚúŪū]" = "𐌿",
"[Pp]" = "𐍀",
"[Rr]" = "𐍂",
"[Ss]" = "𐍃",
"[Tt]" = "𐍄",
"[WwYy]" = "𐍅",
"[Ff]" = "𐍆",
"[Xx]" = "𐍇",
"[Ƕƕ]" = "𐍈", -- Not sure if "hw" and "hv" can safely be converted
"[OoŌō]" = "𐍉",
},
},
"gsw" = {
"name" = "Alemannic German",
},
"grc" = {
"name" = "Ancient Greek",
"article" = "Ancient Greek",
"replacements" = {
decompose = true,
from = {
-- Replace variant letterforms with standard ones.
"ϐ", "ϵ", "ϑ", "ϰ", "ϱ", "ϲ", "ϕ",
-- Remove macrons and breves.
"[" .. macron .. breve .. undertie .. "]"
},
to = {
"β", "ε", "θ", "κ", "ρ", "σ", "φ",
}
},
},
"grk-pro" = { -- Incorrect tag
"name" = "Proto-Hellenic",
"Wikipedia_name" = "Proto-Greek",
"article" = "Proto-Greek language",
"type" = "reconstructed",
"replacements" = {},
"Wikipedia_code" = "grk-x-proto",
},
"grk-x-proto" = {
"name" = "Proto-Hellenic",
"Wikipedia_name" = "Proto-Greek",
"article" = "Proto-Greek language",
"type" = "reconstructed",
"replacements" = {},
},
"grt" = {
"name" = "Garo",
},
"ha" = {
"name" = "Hausa",
-- remove tilde, grave, acute, macron, circumflex
"replacements" = {
decompose = true,
from = { "[" .. grave .. circumflex .. macron .. acute .. tilde .. "]" },
},
},
"hi" = {
"name" = "Hindi",
"article" = "Hindi",
},
"ine-bsl-pro" = {
"name" = "Proto-Balto-Slavic",
"article" = "Proto-Balto-Slavic language",
"type" = "reconstructed",
},
"ine-pro" = { -- Incorrect tag
"name" = "Proto-Indo-European",
"article" = "Proto-Indo-European language",
"type" = "reconstructed",
"replacements" = {},
"Wikipedia_code" = "ine-x-proto",
},
"ine-x-proto" = {
"name" = "Proto-Indo-European",
"article" = "Proto-Indo-European language",
"type" = "reconstructed",
"replacements" = {},
},
"ja" = {
"name" = "Japanese",
"article" = "Japanese language",
},
"jbo" = { -- Lojban
"type" = "appendix",
},
"ket" = {
"name" = "Ket",
"article" = "Ket language",
},
"ksk" = {
"name" = "Kansa",
"article" = "Kansa language",
},
"la" = {
"name" = "Latin",
"article" = "Latin",
"replacements" = {
decompose = true,
from = { "[" .. macron .. breve .. diaeresis .. "]" },
},
},
"lt" = {
"name" = "Lithuanian",
-- remove acute, tilde, grave
"replacements" = {
decompose = true,
from = { "[" .. acute .. tilde .. grave .. "]" },
},
},
"moe" = {
"name" = "Cree",
},
"mul" = {
"name" = "Translingual",
"article" = "",
},
"nci" = {
"name" = "Classical Nahuatl",
"article" = "Classical Nahuatl",
-- Remove macrons, acutes, circumflexes and graves
"replacements" = {
decompose = true,
-- Remove macrons, acutes, circumflexes, graves, and saltillo;
-- see [[Saltillo (linguistics)]].
from = { "[" .. grave .. acute .. macron .. circumflex .. "Ꞌꞌʻʼ'ʔ]" },
},
},
"nds-de" = {
"name" = "German Low German",
},
"non" = {
"name" = "Old Norse",
},
"non-x-proto" = {
"name" = "Proto-Norse",
},
"odt" = {
"name" = "Old Dutch",
},
"oge" = {
"name" = "Old Georgian",
},
"oj" = {
"name" = "Ojibwe",
},
"orv" = {
"name" = "Old East Slavic",
"article" = "Old East Slavic",
"replacements" = {
U(0x484)] = "",
},
},
"osx" = {
"name" = "Old Saxon",
},
"pt" = {
"name" = "Portuguese",
"article" = "Portuguese language",
-- ["scripts"] = { "Latn" },
},
"pa" = {
"name" = "Punjabi",
"article" = "Punjabi language",
},
"pgl" = {
"name" = "Primitive Irish",
"article" = "Primitive Irish",
},
"pis" = {
"name" = "Pijin",
"article" = "Pijin language",
},
"poz-x-poly-proto" = {
"name" = "Proto-Nuclear Polynesian",
"article" = "Proto-Polynesian language",
"type" = "reconstructed",
},
"rap" = {
"name" = "Rapa Nui",
"article" = "Rapa Nui language",
},
"ru" = {
"name" = "Russian",
"article" = "Russian language",
"replacements" = { acute = "", },
},
"rw" = {
"name" = "Rwanda-Rundi",
},
"se" = {
"replacements" = {
"([đflmnŋrsšŧv])'%1" = "%1%1",
},
},
"sem-pro" = {
"name" = "Proto-Semitic",
"article" = "Proto-Semitic",
"type" = "reconstructed",
},
"sh" = {
"article" = "Serbo-Croatian language",
"replacements" = {
decompose = true,
from = { "([AaEeIiOoUuRrАаЕеИиОоУуРр])[" .. double_grave
.. grave .. invbreve .. acute .. macron .. tilde .. "]" },
to = { "%1" },
},
},
"sl" = {
"name" = "Slovene",
"replacements" = {
decompose = true,
-- remove tonal orthography
from = {"ł", "[" .. grave .. acute .. macron .. double_grave .. invbreve .. circumflex .. dot_below .. "]"},
to = {"l"},
},
},
"sla-pro" = {
"name" = "Proto-Slavic", -- also Common Slavic
"type" = "reconstructed",
"replacements" = {
"[ÀÁÃĀȀȂ]" = "A",
"[àáãāȁȃ]" = "a",
"[ÈÉẼĒȄȆ]" = "E",
"[èéẽēȅȇ]" = "e",
"[ÌÍĨĪȈȊ]" = "I",
"[ìíĩīȉȋ]" = "i",
"[ÒÓÕŌȌȎŐ]" = "O",
"[òóõōȍȏő]" = "o",
"[ÙÚŨŪȔȖŰ]" = "U",
"[ùúũūȕȗű]" = "u",
"[ỲÝỸȲ]" = "Y",
"[ỳýỹȳ]" = "y",
"Ǭ" = "Ǫ",
"ǭ" = "ǫ",
"[" .. grave .. acute .. double_acute .. tilde .. macron .. double_grave .. invbreve .. "]" = "",
"ĭ" = "ь",
"ŭ" = "ъ",
},
},
"tts" = {
"name" = "Isan", -- also "Northeastern Thai"
"article" = "Isan language",
},
"ug" = {
"name" = "Uyghur", --also less commonly "Uighur"
"article" = "Uyghur language",
},
"uk" = {
"article" = "Ukrainian language",
"replacements" = { acute = "", }
},
"ur" = {
"name" = "Urdu",
"article" = "Urdu",
},
"xcl" = {
"name" = "Old Armenian",
"article" = "Classical Armenian",
"replacements" = {
"[՞՜՛՟]" = "",
"և" = "եւ",
},
},
"xgf" = {
"name" = "Tongva", -- not ISO name "Gabrielino-Fernandeño"
"article" = "Tongva language",
"replacements" = {
"['`ʔ]" = "ʼ",
},
},
"xlu" = {
"name" = "Luwian", -- not ISO name "Cuneiform Luwian"
"article" = "Cuneiform Luwian"
},
"xpq" = {
"name" = "Mohegan-Pequot",
},
"xxt" = {
"name" = "Tambora",
"article" = "Tambora language",
},
"xvn" = {
"name" = "Vandalic",
"article" = "Vandalic language",
},
"yua" = {
"name" = "Yucatec Maya",
"article" = "Yucatec Maya language",
},
"zh" = {
"name" = "Chinese",
"article" = "Chinese language",
-- ["scripts"] = { "Hani" },
},
},
-- Here, keys (for example, "gem") are Wikipedia language codes used in
-- {{lang}}, and values (for example, "gem-pro") are the equivalent Wiktionary
-- code.
-- Subtags are not currently supported.
"redirects" = {
"aae" = "sq",
"aiq" = "fa",
"aln" = "sq",
"als" = "sq",
"azb" = "az",
"azj" = "az",
"bgn" = "bal",
"bs" = "sh",
"bxr" = "bua",
"ciw" = "oj",
"cnr" = "sh",
"fil" = "tl",
"fuf" = "ff",
"gem" = "gem-pro", -- Not correct, but is commonly used.
"hak" = "zh",
"hbo" = "he",
"hr" = "sh",
"ine" = "ine-pro", -- Not correct, but might be commonly used.
"kjv" = "sh",
"nan" = "zh",
"prs" = "fa",
"rn" = "rw",
"sli" = "gmw-ecg",
"sr" = "sh",
"src" = "sc",
"sro" = "sc",
"tw" = "ak",
"wae" = "gsw",
"wep" = "nds-de",
"yue" = "zh",
"xno" = "fro",
},
}
return data