Module:User:Catonif/szl-IPA

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This is a private module sandbox of Catonif, for their own experimentation. Items in this module may be added and removed at Catonif's discretion; do not rely on this module's stability.


local export = {}

local m_IPA = require("Module:IPA")
local lang = require("Module:languages").getByCode("szl")

local V = "aãõɛeɔoɨui";

local di = {
	["cz"]="t_ʂ", ["rz"]="ꙮ", ["sz"]="ʂ", ["dz"]="d_z",
}

local phon = {
	["a"]="a",		["ã"]="ã",		["b"]="b",
	["c"]="t_s",	["ć"]="t_ɕ",	["d"]="d",	["e"]="ɛ",	["é"]="e",
	["f"]="f",		["g"]="ɡ",		["h"]="x",
	["i"]="i",		["j"]="j",		["k"]="k",	["l"]="l",
	["ł"]="w",		["m"]="m",		["n"]="n",	["ń"]="ɲ",
	["o"]="ɔ",		["ŏ"]="ɔW",		["ō"]="o",	["ô"]="wɔ", -- õ is dealt with later on
	["p"]="p",		["r"]="r",		["s"]="s",	["ś"]="ɕ",	["t"]="t",
	["u"]="u",		["w"]="v",		["y"]="ɨ",
	["z"]="z",		["ż"]="ʐ",		["ź"]="ʑ",
}

local function phonemic(text)

	local ante = false;
	local unstressed = false;

	if (text:find('^*')) then
		unstressed = true;
		text = text:sub(2);
	elseif (text:find('^%^')) then
		ante = true;
		text = text:sub(2);
	end

	function rsub(s, r) text = mw.ustring.gsub(text, s, r); end

	text = mw.ustring.lower(text);

	rsub('ch', 'x'); rsub('[crsd]z', di); rsub('dż', 'd_ʐ'); -- handle digraphs
	rsub(".", phon) -- basic orthographical rules
	rsub('au', 'aW');
	rsub("n([kɡx])", "ŋ%1"); -- (is this really phonemic?)

	-- palatalisation
	-- palatilisation by <-i->
	local C_palat_by_I = 'bdfɡxklwmnprstvzʐʂ'
	rsub("([" .. C_palat_by_I .. "])i([" .. V .. "])", function (c, v)
		return (({
			['n'] = 'ɲ',
			['s'] = 'ɕ', ['z'] = 'ʑ',
		})[c] or c .. 'I') .. v;
	end);
	-- palatalisation by front vowels
	local C_palat_by_F = 'bdfɡxklwmnprstvzʐʂ';
	local F = 'eéiy';
	rsub('(['..C_palat_by_F..'])([' .. F .. '])', function (c, v)
		return (({
			['n'] = 'ɲ',
			['s'] = 'ɕ', ['ʃ'] = 'ɕ',
			['z'] = 'ʑ', ['ʒ'] = 'ʑ',
		})[c] or c .. 'J') .. v;
	end);

	-- voicing and devoicing
	local T = 'ptsʂɕkx';
	local D = 'bdzʐʑɡ';

	rsub('(['..T..'])v', '%1f');
	rsub('(['..T..'])ꙮ', '%1ʂ'); rsub('ꙮ', 'ʐ');

	local function arr_list(x) local r = ''; for i in pairs(x) do r = r..i; end return r; end
	local devoice = {
		['b'] = 'p', ['d'] = 't', ['ɡ'] = 'k',
		['z'] = 's', ['v'] = 'f', ['ʒ'] = 'ʃ',
		['ʑ'] = 'ɕ', ['ʐ'] = 'ʂ',
	};
	rsub('['..arr_list(devoice)..']$', devoice);

	local voice = {}; for i, v in pairs(devoice) do voice[v] = i; end

	local arr_list_devoice = arr_list(devoice);
	local arr_list_voice = arr_list(voice);

	for _ = 0, 5 do
		rsub('(['..arr_list_devoice..'])(['..T..'])', function (a, b) return devoice[a] .. b; end)
		rsub('(['..arr_list_voice..'])(['..D..'])', function (a, b) return voice[a] .. b; end)
	end

	rsub("t([sɕ])", "t_%1"); rsub("d([zʑ])", "d_%1"); -- affricates

	-- hyphenation 
	rsub('%.', '!');
	for _ = 0, 1 do
		rsub('(['..V..'W])([^'..V..'W!.]*)(['..V..'])', function (a, b, c)
			local function find(x) return mw.ustring.find(b, x); end
			if ((mw.ustring.len(b) < 2) or find('^([td]_.)$')) then
				b = '.'..b;
			else
				local i = 2;
				if (find('^([td]_.)')) then i = 4; end
				if (mw.ustring.sub(b, i, i):find('^[rlwIJ]$')) then
					b = '.'..b;
				else
					b = mw.ustring.sub(b, 0, i - 1)..'.'..mw.ustring.sub(b, i);
				end
			end
			return a..b..c;
		end);
	end
	rsub('!', '.')

	-- stress
	if (not unstressed) then
		if (ante) then
			rsub('%.([^.]+%.[^.]+%.[^.]+)$', 'ˈ%1');
		else
			rsub('%.([^.]+%.[^.]+)$', 'ˈ%1');
		end
		if (not mw.ustring.find(text, 'ˈ')) then
			text = 'ˈ' .. text;
		end
	end

	-- this should best happen at the end becase <ɔ̃> is two characters long and would
	-- mess up with bracket catching. in practice it would work as well without this
	-- but it looks bodge-y.
	rsub('õ', 'ɔ̃');
	
	rsub('_', '͡'); rsub('I', 'j'); rsub('J', 'ʲ');

	text = mw.ustring.lower(text);

	return text

end

local function multiword(term)
	if (term:find(' ')) then
		local s = '';
		for v in term:gmatch('[^ ]+') do
			s = s..phonemic(v)..' ';
		end
		return s:sub(0, -2);
	else
		return phonemic(term);
	end
end

-- for testcases
function export.testcase(text)
	return multiword(text);
end

function export.IPA(frame)
	local terms = {}

	local args = frame:getParent().args;

	for _, term in ipairs(args) do
		table.insert(terms, term)
	end

	if #terms == 0 then
		terms = {mw.title.getCurrentTitle().text}
	end

	local IPA_results = {}

	for _, term in ipairs(terms) do
		table.insert(IPA_results, { pron = "/" .. multiword(term) .. "/" })
	end

	return m_IPA.format_IPA_full(lang, IPA_results)
end

return export