Module:sa-Brah-translit

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This module will transliterate Sanskrit language text per WT:SA TR. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:sa-Brah-translit/testcases.

Functions

[edit]
tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local export = {}

local consonants = {
	['๐‘€“']='k', ['๐‘€”']='kh', ['๐‘€•']='g', ['๐‘€–']='gh', ['๐‘€—']='แน…',
	['๐‘€˜']='c', ['๐‘€™']='ch', ['๐‘€š']='j', ['๐‘€›']='jh', ['๐‘€œ']='รฑ', 
	['๐‘€']='แนญ', ['๐‘€ž']='แนญh', ['๐‘€Ÿ']='แธ', ['๐‘€ ']='แธh', ['๐‘€ก']='แน‡', 
	['๐‘€ข']='t', ['๐‘€ฃ']='th', ['๐‘€ค']='d', ['๐‘€ฅ']='dh', ['๐‘€ฆ']='n', 
	['๐‘€ง']='p', ['๐‘€จ']='ph', ['๐‘€ฉ']='b', ['๐‘€ช']='bh', ['๐‘€ซ']='m',
	['๐‘€ฌ']='y', ['๐‘€ญ']='r', ['๐‘€ฎ']='l', ['๐‘€ฏ']='v', ['๐‘€ด']='แธท',
	['๐‘€ฐ']='ล›', ['๐‘€ฑ']='แนฃ', ['๐‘€ฒ']='s', ['๐‘€ณ']='h',
}

local diacritics = {
	['๐‘€ธ']='ฤ', ['๐‘€บ']='i', ['๐‘€ป']='ฤซ', ['๐‘€ผ']='u', ['๐‘€ฝ']='ลซ', ['๐‘€พ']='แน›', ['๐‘€ฟ']='แน', 
	['๐‘€']='แธท', ['๐‘']='แธน', ['๐‘‚']='e', ['๐‘ƒ']='ai', ['๐‘„']='o', ['๐‘…']='au',  ['๐‘†']='',
}

local tt = {
	-- vowels
	['๐‘€…']='a', ['๐‘€†']='ฤ', ['๐‘€‡']='i', ['๐‘€ˆ']='ฤซ', ['๐‘€‰']='u', ['๐‘€Š']='ลซ', ['๐‘€‹']='แน›', ['๐‘€Œ']='แน',
	['๐‘€']='แธท', ['๐‘€Ž']='แธน', ['๐‘€']='e', ['๐‘€']='ai', ['๐‘€‘']='o', ['๐‘€’']='au', 
	-- chandrabindu    
	['๐‘€€']='mฬ', --until a better method is found
	-- anusvara    
	['๐‘€']='แนƒ', --until a better method is found
	-- visarga    
	['๐‘€‚']='แธฅ',
	-- avagraha
	['เคฝ']='โ€™',
	--numerals
	['๐‘ฆ']='0', ['๐‘ง']='1', ['๐‘จ']='2', ['๐‘ฉ']='3', ['๐‘ช']='4', ['๐‘ซ']='5', ['๐‘ฌ']='6', ['๐‘ญ']='7', ['๐‘ฎ']='8', ['๐‘ฏ']='9',
	--punctuation        
    ['๐‘ˆ']='.', --double danda
	['๐‘‡']='.', --danda
    --Vedic extensions
    ['๐‘€ƒ']='x', ['๐‘€„']='f',
    --Om
    ['๐‘€‘๐‘€']='oแนƒ',
    --reconstructed
    ['*'] = '',
}

function export.tr(text, lang, sc)
	text = mw.ustring.gsub(
		text,
		'([๐‘€“๐‘€”๐‘€•๐‘€–๐‘€—๐‘€˜๐‘€™๐‘€š๐‘€›๐‘€œ๐‘€๐‘€ž๐‘€Ÿ๐‘€ ๐‘€ก๐‘€ข๐‘€ฃ๐‘€ค๐‘€ฅ๐‘€ฆ๐‘€ง๐‘€จ๐‘€ฉ๐‘€ช๐‘€ซ๐‘€ฌ๐‘€ญ๐‘€ฎ๐‘€ฏ๐‘€ฐ๐‘€ฑ๐‘€ฒ๐‘€ณ])'..
		'([๐‘€ธ๐‘€บ๐‘€ป๐‘€ผ๐‘€ฝ๐‘€พ๐‘€ฟ๐‘€๐‘๐‘‚๐‘ƒ๐‘„๐‘…๐‘†]?)',
		function(c, d)
			if d == "" then        
				return consonants[c] .. 'a'
			else
				return consonants[c] .. diacritics[d]
			end
		end)

	text = mw.ustring.gsub(text, '.', tt)
	
	return text
end
 
return export