Module:User:Victar/reconstruct

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This is a private module sandbox of Victar, for their own experimentation. Items in this module may be added and removed at Victar's discretion; do not rely on this module's stability.


local export = {}

local circumflex  = U(0x302)	-- circumflex
local macron  = U(0x304)	-- macron

local ecircumflex = U(0x0CA) -- latin small letter e with circumflex
local ocircumflex = U(0x0F4) -- latin small letter o with circumflex

local amacron = U(0x100) -- latin small letter a with macron
local emacron = U(0x304) -- latin small letter e with macron
local imacron = U(0x12B) -- latin small letter i with macron
local omacron = U(0x14D) -- latin small letter o with macron
local umacron = U(0x16B) -- latin small letter u with macron

local aogonek = U(0x104) -- latin small letter a with ogonek
local iogonek = U(0x12F) -- latin small letter i with ogonek
local uogonek = U(0x173) -- latin small letter u with ogonek

local aogonekmacron = aogonek .. macron -- latin small letter a with ogonek and macron
local iogonekmacron = iogonek .. macron -- latin small letter i with ogonek and macron
local oogonekmacron = oogonek .. macron -- latin small letter o with ogonek and macron
local uogonekmacron = uogonek .. macron -- latin small letter u with ogonek and macron

local oogonekcircumflex = oogonek .. circumflex -- latin small letter o with ogonek and circumflex

local consonant = {"b", "c", "d", "f", "gw", "g", "k", "kw", "l", "m", "ng", "n", "p", "r", "ʀ", "s", "t", "θ", "v", "w", "z", "j", "hw", "h"}
local consonant_cluster = {"br", "bl", "dr", "fl", "fr", "gl", "gr", "kl", "kr", "pl", "pr", "sl", "sr", "tr"}
local vowel = {"a", amacron, aogonek, aogonekmacron, "e", emacron, "i", imacron, iogonek, iogonekmacron, "o", omacron, oogonekmacron, oogonekcircumflex, "u", umacron, uogonek, uogonekmacron}
local diphthong = {"ai", "au", "eu", "ui"}

function syllablize(term)
	
	consonant = consonant .. consonant_cluster
	vowel = vowel .. diphthong

	--term = preg_replace('/('.vowel.')('.consonant.')('.vowel.')/','$1.$2$3',term) -- before a single consonant

	--term = preg_replace('/('.vowel.')('.consonant.')('.consonant.')('.vowel.')/','$1$2.$3$4',term) -- between two consonants
	--term = preg_replace('/('.vowel.')('.consonant.')('.consonant.')('.consonant.')('.vowel.')/','$1$2$3.$4$5',term) -- between two consonants

	--term = preg_replace('/('.vowel.')('.consonant.')('.vowel.')\z/','$1.$2$3',term)
	--term = preg_replace('/(^[\.])('.consonant.')('.vowel.')('.consonant.')\z/','$1.$2$3$4',term)
	--term = preg_replace('/^('.consonant.')('.vowel.')('.consonant.')(^[\.])/','$1.$2$3$4',term)

	--term = preg_replace('/('.vowel.')('.consonant.')('.consonant.')('.vowel.')\z/','$1$2.$3$4',term)

	--term = preg_replace('/(\.)('.consonant.')('.vowel.')('.consonant.')('.vowel.')(\.)/','$1$2$3.$4$5$6',term)

	return term

end

function germinate(term)
	consonant_except_r = str_replace('r|','',consonant)
	--term = preg_replace('/('.vowel.')('.consonant_except_r.')(\.?)(ij|j)/','$1$2$2$3$4',term)
	return term
end

function replace_final(pattern, replacement, term)
	return mw.ustring.gsub(term, pattern .. "%f[%s%z]", replacement)
end


function umlaut(pattern, replacement, before, term)
	vowel_array = str_replace('|','',vowel);
    --term = preg_replace('/([^'.vowel_array.'])('.pattern.')(\.?)('.consonant.')(\.?)('.before.')/','$1'.replacement.'$3$4$5$6',term);
	--term = preg_replace('/([^'.vowel_array.'])('.pattern.')(\.?)('.consonant.')(\.?)('.consonant.')(\.?)('.before.')/','$1'.replacement.'$3$4$5$6$7$8',term);
	return term;
end

function export.reconstruct(term, pos, gender)
	
	term = syllablize(term)

	local term = umlaut("u","o","a|o",term) -- a-umlaut: */u/ and occasionally */i/ lower to */o/ */e/ before a consonant /o/ /ɑ/ ("nonhigh umlaut")
	term = umlaut("i","e","a|o",term) --

	--term = preg_replace('/('.vowel.')('.vowel.')('.vowel.')(ˌ)/','.',term)
	--term = preg_replace('/('.vowel.')('.vowel.')(ˌ)/','.',term)
	--term = preg_replace('/('.vowel.')(ˌ)/','.',term)

	term = replace_final("z", "", term) -- *z > ∅ /_# loss of word-final *z
	term = replace_final("a", "", term) -- *a > ∅ /_#
	term = replace_final(aogonek, "", term) -- *ą > ∅ /_#

	term = mw.ustring.gsub(term, "^gw", "g") --
	term = mw.ustring.gsub(term, "^kw", "k") -- Cʷ > C /-#_ : delabialization of all labiovelar consonants except word-initially
	term = mw.ustring.gsub(term, "^hw", "h") --

	term = mw.ustring.gsub(term, emacron, amacron) -- ē > ā : lowering and retraction of *ē to *
	term = mw.ustring.gsub(term, "z", "ʀ") -- rhotacism of *z to *ʀ

	term = germinate(term) -- germination of all consonants except *r before *j and ij

	--term = preg_replace('/(\.)('.str_replace("|j","",consonant).')\z/','$2',term)

	term = mw.ustring.gsub(term, (consonant) .. "(j)$", "%1i")
	term = mw.ustring.gsub(term, (consonant) .. "(w)$", "%1u")
	
	term = mw.ustring.gsub(term, (consonant) .. (consonant) .. "[ui]$", "$1$2") -- loss of word-final *i and *u in long-stem terms

	return term
end