Module:R:Perseus

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This module implements {{R:Autenrieth}}, {{R:Elementary Lewis}}, {{R:Harpocration}}, {{R:L&S}}, {{R:LSJ}}, {{R:Middle Liddell}}, {{R:Peck}}, {{R:PersEnc}}, {{R:Platner}}, {{R:Slater}}, {{R:Smith's Antiquities}}, {{R:Smith's Geography}}, {{R:Smith's Persons}}, {{R:Stillwell}}, and {{R:Zoega}}.

1 June 2016: An LSJ collision set was added to allow several thousand entries such as πεῖ (peî) and πέρα (péra) to be redirected to the appropriate Perseus lemma disambiguation pages without template arguments. The code was slightly refactored to mitigate the added complexity.


local export = {}

-- Collisions contained in submodules of [[Module:R:Perseus/collision-data]].
local m_params = require("Module:parameters")
local m_utils = require("Module:grc-utilities")
local m_scripts = require("Module:scripts")
local m_script_utils = require("Module:script utilities")
local m_languages = require("Module:languages")

local tag_greek = m_utils.tag

local function tag_latin(text)
	local lang = m_languages.getByCode("la")
	local sc = m_scripts.getByCode("Latn")
	return m_script_utils.tag_text(text, lang, sc, nil)
end

local function track(code)
	require('Module:debug').track('R:Perseus/' .. code)
	return '' -- for format_perseus_wikilink
end

local function lower_dashed(w)
	return string.gsub(string.lower(w), " ", "-")
end

local function remove_diacritics(x)
	return mw.ustring.gsub(mw.ustring.toNFD(x), '%W+', "")
end

-- maybe there is a better way to do this
local function beta(w)
	return require("Module:R:Perseus/polytonic-to-perseus-betacode").polytonic_to_perseus_betacode(w)
end

--[[ Resources:
	template name, with "R:" removed = {
		[1] = Perseus resource id,
		[2] = collisions index name,
		[3] = f-query-entry-postprocess,
		[4] = query-entry-suffix,
		[5] = language name
	}
	]]
-- This allows the optional selection of a different bio number for Smith's Persons.
if mw.getCurrentFrame():getParent().args[2] then
	if mw.ustring.match(mw.getCurrentFrame():getParent().args[2],'-bio-',1) then
		a = ""
	else
		a = '-bio-1'
	end
end
local resources = {
	["L&S"] = {
		"1999.04.0059",
		"LS",
		nil,
		nil,
		'latin',
	},
	["Elementary Lewis"] = {
		"1999.04.0060",
		"EL",
		nil,
		nil,
		'latin'
	},
	["Peck"] = {
		"1999.04.0062",
		nil,
		lower_dashed,
		'-harpers',
		'latin'
	},
	["PersEnc"] = {
		"1999.04.0004",
		nil,
		lower_dashed,
		"",
		'latin'
	},
	["Stillwell"] = {
		"1999.04.0006",
		"PECS",
		lower_dashed,
		"",
		'latin'
	},
	["Platner"] = {
		"1999.04.0054",
		"TDAR",
		lower_dashed,
		"",
		'latin'
	},
	["Smith's Antiquities"] = {
		"1999.04.0063",
		nil,
		lower_dashed,
		'-cn',
		'latin'
	},
	["Smith's Persons"] = {
		"1999.04.0104",
		nil,
		lower_dashed,
		a,
		'latin'
	},
	["Smith's Geography"] = {
		"1999.04.0064",
		nil,
		lower_dashed,
		'-geo',
		'latin'
	},
	["LSJ"] = {
		"1999.04.0057",
		"LSJ",
		nil,
		nil,
		'greek'
	},
	["Middle Liddell"] = {
		"1999.04.0058",
		"ML",
		nil,
		nil,
		'greek'
	},
	["Harpocration"] = {
		"2013.01.0002",
		nil,
		function(w)
			return lower_dashed(remove_diacritics(w))
		end,
		"",
		'greek'
	},
	["Autenrieth"] = {
		"1999.04.0073",
		"Autenrieth",
		nil,
		nil,
		'greek'
	},
	["Slater"] = {
		"1999.04.0072",
		"Slater",
		nil,
		nil,
		"greek"
	},
	["Zoega"] = {
		"2003.02.0002",
		"Zoega",
		nil,
		nil,
		'non'
	},
}

local function get_language(template)
	return resources[template][5]
end

local function is_collision(x, template)
	local collisions_data = resources[template][2]
	local lhs_postprocess = resources[template][3] or x
	if collisions_data then
		return mw.loadData("Module:R:Perseus/collision-data/" .. resources[template][2])[lhs_postprocess] == true
	end
end

local function format_perseus_url(beta_or_latin, template, redirect)
	local harpo = template == 'Harpocration' and ":letter=" .. string.upper(string.sub(remove_diacritics(beta_or_latin), 1, 1)) or ""
	local data = resources[template]
	local id = data[1] or ''
	local url_redirect_lhs = 'https://www.perseus.tufts.edu/hopper/resolveform?type=exact&lookup='
	local url_entry_lhs = 'https://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:' .. id .. harpo .. ':entry='
	local url_rhs = redirect and '&lang=' .. get_language(template) or ''
	
	local postprocess
	if data[3] ~= nil then
		postprocess =
		function(w)
			return data[3](w) .. (data[4] or '')
		end
	else
		postprocess =
		function(w)
			return w
		end
	end
	
	return (redirect and url_redirect_lhs or url_entry_lhs)
			.. postprocess(beta_or_latin)
			.. url_rhs
end

local function is_polytonic(text)
	local sccode = m_languages.getByCode("grc"):findBestScript(text):getCode()
	return sccode == "Polyt"
end

local function format_perseus_wikilink(title, beta_or_latin, template, redirect)
	local title_span = title
	
	if get_language(template) == 'greek' then
		if not is_polytonic(title_span) then
			-- [[Special:WhatLinksHere/Wiktionary:Tracking/R:Perseus/wrong-script]]
			track('wrong-script')
		end
		title_span = tag_greek(title_span)
	elseif get_language(template) == 'latin' then
		title_span = tag_latin(title_span)
	end
	return (beta_or_latin == '' and track('no Perseus link')
			or '“[' .. format_perseus_url(beta_or_latin, template, redirect) .. ' ' .. title_span .. ']”, in ')
end

function export.create(frame)
	local params = {
		[1] = {}, -- Perseus code or word
		[2] = {}, -- word; only for Greek templates?
		["page"] = {},
		["pages"] = {},
		["pageref"] = {},
		["column"] = {},
		["columns"] = {},
		["passage"] = {},
		["author"] = {}
	}
	local args = m_params.process(frame:getParent().args, params)
	
	local template = string.gsub(frame:getParent():getTitle(), "^Template:R:", "")
	template = string.gsub(template, "/sandbox$", "")
	local latin = not (get_language(template) == 'greek')
	
	if not latin and args[2] and not args[1] then
		error('Parameter 2 should be placed in parameter 1.')
	end
	
	local title = mw.title.getCurrentTitle()
	
	local word, perseus_code
	if latin then
		word = args[1] or title.text
		perseus_code = args[2] or title.text
	else
		word = args[2]
		
		if word and not is_polytonic(word) then
			error('Second parameter of {{[[Template:R:' .. template .. '|R:' .. template ..
					']]}} should be a Greek word.')
		end
		
		if args[1] then
			if is_polytonic(args[1]) then
				if not word then
					word = args[1]
				else
					error('Second parameter of {{[[Template:R:' .. template .. '|R:' .. template ..
							']]}} is a Greek word, so first parameter must be Perseus entry code.')
				end
			else
				perseus_code = args[1]
			end
		end
			
		
		if not word then
			local pagename = title.text
			if is_polytonic(pagename) then
				word = pagename
			elseif title.nsText == "Template" then
				word = 'λέξις'
			else
				error('{{[[Template:R:' .. template .. '|R:' .. template ..
						']]}} needs manual input: pagename is not Greek.')
			end
		end
		
		if not perseus_code then
			perseus_code = beta(word)
		end
	end
	
	local redirect = not (args[1] or args[2]) and is_collision(word, template)
	
	if word == template then
		return ""
	else
		return format_perseus_wikilink(word, perseus_code, template, redirect)
	end
end

return export