Module:Sandbox/AbstractWikipedia/Functions

From Meta, a Wikimedia project coordination wiki
Module documentation

This is the functions module of the Abstract Wikipedia template-renderer prototype.

You can define here new functions to be used in the template language. A function invoked directly in a template's slot should return a lexeme as defined in the lexemes module.

Some remarks:

Current language-specific implementations:


local p = {}

local l = require("Module:Sandbox/AbstractWikipedia/Lexemes")
local te = require("Module:Sandbox/AbstractWikipedia/TemplateEvaluator")
local wd = require("Module:Sandbox/AbstractWikipedia/Wikidata")

-- It is assumed that the global variable "language" holds the language code
-- used for rendering

language = "he"

-- The following is a list of language-agnostic functions to be supported by the
-- template language. Language-specific implementations can be found in the
-- Module:Sandbox/AbstractWikipedia/Functions/xx where xx is the language code.

-- Construct a lexeme from a cardinal number. Note that the argument number is
-- assumed to be a string
function p.Cardinal ( number )
	local result = l.newLexeme ( number, "numeral")
	-- simple logic, only works for some languages
	if tonumber(number) == 1 then
		result.addFeature("number", "singular")
	else
		result.addFeature("number", "plural")
	end
	result.addForm ( tostring(number), {} )
	return result
end

-- General facility to convert text to a lexeme
function p.TemplateText ( text, text_type )
	text_type = text_type or "text"
	return l.newLexeme( text, text_type)
end

-- Helper functions to expand a list of Q-id features and add them to a form
-- or lexeme
local function addFeaturesToForm (form, wdFeatures)
	for _, wdFeature in ipairs(wdFeatures) do
		local features_to_add = wd.expandFeature(wdFeature)
		for category, feature in pairs(features_to_add) do
			form.addFeature(category, feature)
		end
	end
end

-- Fetch the demonym data from an item, and construct an appropriate lexeme
function p.Demonym ( q_id )
	local demonyms = wd.getDemonyms(q_id)
	if #demonyms == 0 then
		error("No demonyms for "..q_id.." in language "..language)
		-- Use some fallback here?
	end
	-- Demonyms can be either specified by giving all their forms or by linking
	-- to a lexeme
	if demonyms[1].lexeme then 
		return p.Lexeme(demonyms[1].lexeme)
	end
	local lexeme = l.newLexeme(demonyms[1].label , "adjective")
	for _, demonym in ipairs(demonyms) do
		local form = lexeme.addForm(demonym.label)
		addFeaturesToForm(form, demonym.features)
	end
	lexeme.log()
	return lexeme
end

-- Function to transform lexemes from Wikidata the the internal representation
-- The extra arguments are Q-ids of features which should act as extra
-- contraints.
function p.Lexeme (lexeme_id, ...)
	local wdLexeme = mw.wikibase.getEntity( lexeme_id ) 
	local lemma, used_language = wd.getLemma(wdLexeme, lexeme_id)
	local lexeme = l.newLexeme(lemma , wd.getPOS(wdLexeme))
	local grammatical_gender = wd.getGrammaticalGender(lexeme_id)
	if grammatical_gender then
		lexeme.addFeature("gender", grammatical_gender)
	end
	-- More statements may need to be fetched here
	
	-- Add any extra constraints passed as arguments
	addFeaturesToForm(lexeme, arg)
		
	forms = wdLexeme:getForms()
	for index, wdForm in ipairs(forms) do -- ingest forms
		-- We only want forms of one language code 
		spelling = wdForm:getRepresentation(used_language)
		if spelling then
			local form = lexeme.addForm(spelling, {})
			wdFeatures = wdForm:getGrammaticalFeatures()
			addFeaturesToForm(form, wdFeatures)
		end
	end
	lexeme.log()
	return lexeme
end

-- Creates a compound lexeme out of two lexemes (typically nouns), which
-- should be the gendered versions of the same lemma (e.g. German Arzt/Ärztin)
function p.GenderedLexeme ( masculine_lexeme_id, feminine_lexeme_id, ...)
	local mascLexeme = mw.wikibase.getEntity( masculine_lexeme_id ) 
	local femLexeme = mw.wikibase.getEntity( feminine_lexeme_id )
	local mascLemma, used_language = wd.getLemma(mascLexeme, masculine_lexeme_id)
	local femLemma, used_language2 = wd.getLemma(femLexeme, feminine_lexeme_id)
	if used_language ~= used_language2 then
		error("Lexemes "..mascLemma.." and "..femLemma.." don't use the same language code")
	end
	local pos = wd.getPOS(mascLexeme)
	if pos ~= wd.getPOS(femLexeme) then
		error("Lexemes "..mascLemma.." and "..femLemma.." don't have the same part-of-speech")
	end
	lexeme = l.newLexeme(mascLemma.."/"..femLemma , pos)
	-- Add any extra constraints passed as arguments
	addFeaturesToForm(lexeme, arg)
	for gender, wdLexeme in pairs{ masculine = mascLexeme, feminine = femLexeme } do
		forms = wdLexeme:getForms()
		for index, wdForm in ipairs(forms) do -- ingest forms
			-- We only want forms of one language code 
			spelling = wdForm:getRepresentation(used_language)
			if spelling then
				local form = lexeme.addForm(spelling)
				wdFeatures = wdForm:getGrammaticalFeatures()
				addFeaturesToForm(form, wdFeatures)
				if not form.getFeatureIndex("gender") then
					form.addFeature("gender", gender)
				end
			end
		end
	end
	lexeme.log()
	return lexeme
end


-- Constructs a lexeme corresponding to a person
-- This populates the grammatical gender according to the social gender
-- and adds grammatical number "singular"
function p.Person (q_id)
	local name = wd.getLabel( q_id )
	local result = l.newLexeme ( name, "noun")
	result.addFeature("number", "singular")
	local gender = wd.getHumanGender(q_id)
	-- Handling of non-binary gender is language dependent and would have to
	-- be done in a language-specific implementation.
	if (gender == "masculine" or gender == "feminine") then
		result.addFeature("gender", gender)
	end
	if (wd.isDead(q_id)) then
		-- We add a past tense feature for lexeme of dead people, as they are
		-- normally spoken about in the past tense. This can exposed to the
		-- verb by using the "tsubj relation".
		result.addFeature("nominal_tense", "past")
	end
	result.log()
	return result
end

-- Fetches the label of an entity.
-- To allow reverse look-up of lexemes from items, I have used the 
-- "literal translation" propery (P2441) qualified by "lexeme sense" (P7018)
-- to point to language-specific lexemes. See discussion in:
-- https://phabricator.wikimedia.org/T320263#8341702
-- These lexemes are stored in the gendered.unspecified.lexeme field (if no
-- gedered pairs are given in "male form of label" (P3321) or 
-- "female form of label" (P2521)).
function p.Label (q_id)
	-- We disable the following check, since it requires an expensive call
	--[[ if wd.isHuman(q_id) then 
		return p.Person(q_id)
	end
	]]--
	local gendered = wd.getGenderedLabels(q_id)
	if (gendered.male.lexeme and gendered.female.lexeme) then
		return p.GenderedLexeme(gendered.male.lexeme, gendered.female.lexeme)
	elseif (gendered.male.lexeme or gendered.female.lexeme) then
		return p.Lexeme(gendered.male.lexeme or gendered.female.lexeme)
	elseif (gendered.unspecified.lexeme) then
		return p.Lexeme(gendered.unspecified.lexeme)
	else
		lexeme = l.newLexeme(wd.getLabel(q_id) , "noun")
		if (gendered.male.label) then
			lexeme.addForm(gendered.male.label, {"gender", "masculine"})
		end
		if (gendered.female.label) then
			lexeme.addForm(gendered.female.label, {"gender", "feminine"})
		end
	end
	return lexeme
end

-- Example of the use a sub-template as a function
function p.QuantifiedNoun(num, noun)
	return te.evaluateTemplate("{nummod:Cardinal(num)} {root:noun}", { num = num, noun = noun})
end

-- Invokes either the Person function or the Pronoun function, depending on pronominalize
-- If neither a q_id nor pronominalize are set, it will return an empty dummy noun
function p.PersonOrPronoun(q_id, pronominalize)
	if q_id:match("^Q%d+") then
		if pronominalize == "true" then
			return functions.Pronoun(q_id)
		else
			return functions.Person(q_id)
			
		end
	elseif pronominalize == "true" then
		return functions.Pronoun()
	else
		return l.newLexeme("", "noun")
	end
end

-- Generates a generic date expression of the form day.month.year
-- TODO: This should really use CLDR to get a language-specific expression
function p.Date ( date )
	local elements = {}
	if date.day and tonumber(date.day) > 0 then
		table.insert(elements, tostring(date.day))
	end
	if date.month and tonumber(date.month) > 0 and tonumber(date.month) <=12 then
		table.insert(elements, tostring(date.month))
	end
	if date.year and tonumber(date.year) ~= 0 then
		table.insert(elements, tostring(date.year))
	end
	local result = table.concat(elements, '.')
	return l.newLexeme (result, "noun")
end

-- Generates a generic ordina expression of the form day.month.year
-- TODO: This should really use CLDR to get a language-specific expression
function p.Ordinal ( number )
	return l.newLexeme(tostring(number)..'.', "adjective")
end

return p