Jump to content

Module:Formatnum

Permanently protected module
From Meta, a Wikimedia project coordination wiki
Module documentation
-- This module is necessary for Module:Wikidata
local p = {}

-- frequently used functions
local floor = math.floor
local strLen = string.len
local strRep = string.rep
local strFind = string.find
local strSub = string.sub
local strGsub = string.gsub

local textSub = mw.ustring.sub
local textRep = mw.ustring.rep
local textLen = mw.ustring.len

local isSupportedLanguage = mw.language.isSupportedLanguage
local isKnownLanguageTag = mw.language.isKnownLanguageTag
local getLanguage = mw.getLanguage

-- Substitutions for languages not supported by mw.language:formatNum() in core Lua libraries for MediaWiki
-- Converts patterns of basic substrings only from ASCII to localized text (possibly Unicode) using fast string.gsub()
local digits = {
    ['ml-Mlym'] = {
        ['0'] = '൦', ['1'] = '൧', ['2'] = '൨', ['3'] = '൩', ['4'] = '൪',
        ['5'] = '൫', ['6'] = '൬', ['7'] = '൭', ['8'] = '൮',  ['9'] = '൯',
    },
    ['mn-Mong'] = {
        ['0'] = '᠐', ['1'] = '᠑', ['2'] = '᠒', ['3'] = '᠓', ['4'] = '᠔',
        ['5'] = '᠕', ['6'] = '᠖', ['7'] = '᠗', ['8'] = '᠘', ['9'] = '᠙',
    },
    ta = {
        ['0'] = '௦', ['1'] = '௧', ['2'] = '௨', ['3'] = '௩', ['4'] = '௪',
        ['5'] = '௫', ['6'] = '௬', ['7'] = '௭', ['8'] = '௮', ['9'] = '௯',
    },
    te = {
        ['0'] = '౦', ['1'] = '౧', ['2'] = '౨', ['3'] = '౩', ['4'] = '౪',
        ['5'] = '౫', ['6'] = '౬', ['7'] = '౭', ['8'] = '౮', ['9'] = '౯',
    },
    th = {
        ['0'] = '๐', ['1'] = '๑', ['2'] = '๒', ['3'] = '๓', ['4'] = '๔',
        ['5'] = '๕', ['6'] = '๖', ['7'] = '๗', ['8'] = '๘', ['9'] = '๙',
    },
}

function p.formatNum(number, lang, prec, compact)
    -- Do not alter the specified value when it is not a valid number, return it as is
    local value = tonumber(number)
    if value == nil then
        return number
    end
    -- Basic ASCII-only formatting (without paddings)
    number = tostring(value)
    -- Check the presence of an exponent (incorrectly managed in mw.language:formatNum() and even forgotten due to an internal bug, e.g. in Hindi)
    local exponent
    local pos = strFind(number, '[Ee]')
    if pos ~= nil then
        exponent = strSub(number, pos + 1, strLen(number))
        number = strSub(number, 1, pos - 1)
    else
        exponent = ''
    end
    -- Check the minimum precision requested
    prec = tonumber(prec) -- nil if not specified as a true number
    if prec ~= nil then
        prec = floor(prec)
        if prec < 0 then
            prec = nil -- discard an incorrect precision (not a positive integer)
        elseif prec > 14 then
            prec = 14 -- maximum precision supported by tostring(number)
        end
    end
    -- Preprocess the minimum precision in the ASCII string
    local dot = '.'
    if (prec or 0) > 0 then
        pos = strFind(number, dot, 1, true) -- plain search, no regexp
        if pos ~= nil then
            prec = pos + prec - strLen(number) -- effective number of trailing decimals to add or remove
            dot = '' -- already present
        --else dot and precision padding must be added
        end
    else
        dot = '' -- don't add dot
        prec = 0 -- don't alter the precision
    end
    if lang ~= nil and isKnownLanguageTag(lang) == true then
        -- Convert number to localized digits, decimal separator, and group separators
        local language = getLanguage(lang) -- caveat: can load localized resources for up to 20 languages
        if compact then
            number = language:formatNum(tonumber(number), { noCommafy = 'y' })
        else
            number = language:formatNum(tonumber(number))
        end
        -- Postprocessing the precision
        if prec > 0 then
            local zero = language:formatNum(1.04) -- format a non-integer constant
            if dot ~= '' then -- only if adding dot is required
                dot = textSub(zero, 2, 2) -- decimal separator of formatted constant
            end
            zero  = textSub(zero, 3, 3) -- first decimal of formatted constant
            number = number .. dot .. textRep(zero, prec)
        elseif prec < 0 then
            -- TODO: rounding of last decimal; here only truncate decimals in excess
            number = textSub(number, 1, textLen(number) + prec)
        end
        -- Append the localized base-10 exponent without grouping separators (there's no reliable way to detect a localized leading symbol 'E')
        if exponent ~= '' then
            number = number .. 'E' .. language:formatNum(tonumber(exponent), { noCommafy = 'y' })
        end
    else -- not localized, ASCII only
        -- Postprocessing the precision
        if prec > 0 then
            number = number .. dot .. strRep('0', prec)
        elseif prec < 0 then
            -- TODO: rounding of last decimal; here only truncate decimals in excess
            number = strSub(number, 1, strLen(number) + prec)
        end
        -- Append the base-10 exponent without grouping separators
        if exponent ~= '' then
            number = number .. 'E' .. exponent
        end
    end
    -- Special cases for substitution of ASCII digits (missing support in Lua core libraries for some languages)
    if lang ~= nil and digits[lang] then
        for k, v in pairs(digits[lang]) do
            number = strGsub(number, k, v) -- 'mw.ustring' not needed for pattern matching; faster with 'string'
        end
    end
    return number
end

local _parentFrame = nil
local function getArgs(frame)
	if _parentFrame == nil then
		_parentFrame = frame:getParent() -- costly
		if _parentFrame == nil then
			_parentFrame = frame
		end
	end
	return _parentFrame.args
end

local _pageLang = nil
local function getPageLang(frame)
	if _pageLang == nil then
		_pageLang = frame:preprocess('{{PAGELANGUAGE}}') or '' -- costly
	end
	return _pageLang
end

function p.main(frame)
    local args = getArgs(frame)
    local prec    = args.prec or ''
    local sep     = args.sep or ''
    local number  = args.number or args[1] or ''
    local lang    = args.lang or args[2] or ''
    -- validate the language parameter within MediaWiki's caller frame
    if lang == 'arabic-indic' then -- only for compatibility ('arabic-indic' is not a SupportedLanguage)
        lang = 'fa' -- better support than 'ur', 'ks' or 'ps'
    elseif lang == 'ml-old' then -- only for compatibility ('ml-old' is not a SupportedLanguage)
    	lang = 'ml-Mlym'
    elseif lang == 'R' then -- only for compatibility ('R' is not a SupportedLanguage)
    	lang = nil -- not localized (raw)
    elseif lang == '' or not isSupportedLanguage(lang) then
        -- Note that 'SupportedLanguages' are not necessarily 'BuiltinValidCodes', and so they are not necessarily
        -- 'KnownLanguages' (with a language name defined at least in the default localisation of the local wiki).
        -- But they all are ValidLanguageCodes (suitable as Wiki subpages or identifiers: no slash, colon, HTML tags, or entities)
        -- In addition, they do not contain any capital letter in order to be unique in page titles (restriction inexistant in BCP47),
        -- but they may violate the standard format of BCP47 language tags for specific needs in MediaWiki.
        -- Empty/unspecified and unsupported languages are treated here in Commons using the user's language,
        -- instead of the local 'ContentLanguage' of the Wiki.
        lang = getPageLang(frame)
    end
    return p.formatNum(number, lang, prec, sep ~= '')
end

return p