Module:AutosortTable

From Meta, a Wikimedia project coordination wiki
Module documentation
--[[
AutosortTable: Creates a table which is automatically sorted

Usage: (Remove the hidden comments before use)

{{#invoke: AutosortTable|create
|class = wikitable <!-- Class for the entire table -->
|style = width:50% <!-- CSS for the entire table -->
|sep = --          <!-- Separator used to prefix or cells, such as '--' or '!!' (cannot use any '|' or '=') -->
|order = 2, 1      <!-- Order for sorting preference, takes a coma-separated list of column numbers -->
|nsort = 2         <!-- Columns which use numeric sorting. Takes a coma-separated list of column numbers -->
|grpsep = ,        <!-- Group separator in numeric values (defaults to ','), to disambiguate decimal separator -->
|valsep = !        <!-- Sortkey separator before displayed text in a cell, such as '!' (cannot use any '|' or '=') -->
|header = -- Name -- Age                   <!-- Table header (uses sep) -->
|colstyle = -- -- text-align:right         <!-- CSS styles for cells in each column (uses sep) -->
| -- Bob -- 20                             <!-- Row 1 (uses sep) -->
| -- Peter -- 35                           <!-- Row 2 (uses sep) -->
| -- John -- 35.1!35                       <!-- Row 3 (uses sep), sorted as 35.1, displayed as 35 -->
| -- James -- 50                           <!-- Row 4 (uses sep) -->
| background-color: #FFDDDD -- Henry -- 45 <!-- Row 5 (uses sep), with CSS for the whole row before the two cells -->
}}

]]

local _module = {}

-- Frequently-used functions
local strGsub = string.gsub
local strMatch = string.match
local textIndexOf = mw.ustring.find
local textSub = mw.ustring.sub
local textGsub = mw.ustring.gsub
local textChar = mw.ustring.char
local textSplit = mw.text.split
local textTrim = mw.text.trim
local htmlCreate = mw.html.create

-- Data for building the numeric comparator function below: conversion to ASCII
local classPattern, charSubst, zeroes = {
	',−﹐﹑﹒﹣,-.' -- part of a character class '[...]' in a pattern
}, {
	-- [' '] = ' ', -- U+0020 (SPACE)
	-- [','] = ',', -- U+002C (COMMA) -- see grpsep
	-- ['-'] = '-', -- U+002D (MINUS-HYPHEN)
	-- ['.'] = '.', -- U+002E (FULL STOP) -- see grpsep
	[' '] = ' ', -- U+00A0 (NON-BREAKING SPACE)
	[' '] = ' ', -- U+202F (NARROW NON-BREAKING SPACE)
	['−'] = '-', -- U+2212 (MATHEMATICAL MINUS)
	[' '] = ' ', -- U+3000 (IDEOGRAPHIC SPACE)
	['、'] = ',', -- U+3001 (IDEOGRAPHIC COMMA)
	['。'] = '.', -- U+3001 (IDEOGRAPHIC FULL STOP)
	['﹐'] = '.', -- U+FE50 (SMALL COMMA)
	['﹑'] = ',', -- U+FE51 (SMALL IDEOGRAPHIC COMMA)
	['﹒'] = '.', -- U+FE52 (SMALL FULL STOP)
	['﹢'] = '+', -- U+FE62 (SMALL MINUS-HYPHEN)
	['﹣'] = '-', -- U+FE63 (SMALL MINUS-HYPHEN)
	['''] = "'", -- U+FF0C (FULLWIDTH SINGLE QUOTE)
	['+'] = '+', -- U+FF0C (FULLWIDTH PLUS)
	[','] = ',', -- U+FF0C (FULLWIDTH COMMA)
	['-'] = '-', -- U+FF0D (FULLWIDTH MINUS-HYPHEN)
	['.'] = '.', -- U+FF0E (FULLWIDTH FULL STOP)
	['。'] = '.', -- U+FF61 (HALFWIDTH FULL STOP)
	['、'] = ',', -- U+FF64 (HALFWIDTH COMMA)
}, { -- Source: https://www.unicode.org/Public/13.0.0/ucd/extracted/DerivedNumericType.txt
	--0x0030, -- 0030..0039 ; Decimal # Nd [10] DIGIT ZERO..DIGIT NINE
	0x0660, -- 0660..0669 ; Decimal # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
	0x06F0, -- 06F0..06F9 ; Decimal # Nd [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE
	0x07C0, -- 07C0..07C9 ; Decimal # Nd [10] NKO DIGIT ZERO..NKO DIGIT NINE
	0x0966, -- 0966..096F ; Decimal # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE
	0x09E6, -- 09E6..09EF ; Decimal # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE
	0x0A66, -- 0A66..0A6F ; Decimal # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE
	0x0AE6, -- 0AE6..0AEF ; Decimal # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE
	0x0B66, -- 0B66..0B6F ; Decimal # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE
	0x0BE6, -- 0BE6..0BEF ; Decimal # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE
	0x0C66, -- 0C66..0C6F ; Decimal # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE
	0x0CE6, -- 0CE6..0CEF ; Decimal # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
	0x0D66, -- 0D66..0D6F ; Decimal # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE
	0x0DE6, -- 0DE6..0DEF ; Decimal # Nd [10] SINHALA LITH DIGIT ZERO..SINHALA LITH DIGIT NINE
	0x0E50, -- 0E50..0E59 ; Decimal # Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE
	0x0ED0, -- 0ED0..0ED9 ; Decimal # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE
	0x0F20, -- 0F20..0F29 ; Decimal # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE
	0x1040, -- 1040..1049 ; Decimal # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE
	0x1090, -- 1090..1099 ; Decimal # Nd [10] MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE
	0x17E0, -- 17E0..17E9 ; Decimal # Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE
	0x1810, -- 1810..1819 ; Decimal # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE
	0x1946, -- 1946..194F ; Decimal # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE
	0x19D0, -- 19D0..19D9 ; Decimal # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE
	0x1A80, -- 1A80..1A89 ; Decimal # Nd [10] TAI THAM HORA DIGIT ZERO..TAI THAM HORA DIGIT NINE
	0x1A90, -- 1A90..1A99 ; Decimal # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE
	0x1B50, -- 1B50..1B59 ; Decimal # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE
	0x1BB0, -- 1BB0..1BB9 ; Decimal # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE
	0x1C40, -- 1C40..1C49 ; Decimal # Nd [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE
	0x1C50, -- 1C50..1C59 ; Decimal # Nd [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE
	0xA620, -- A620..A629 ; Decimal # Nd [10] VAI DIGIT ZERO..VAI DIGIT NINE
	0xA8D0, -- A8D0..A8D9 ; Decimal # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE
	0xA900, -- A900..A909 ; Decimal # Nd [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE
	0xA9D0, -- A9D0..A9D9 ; Decimal # Nd [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE
	0xA9F0, -- A9F0..A9F9 ; Decimal # Nd [10] MYANMAR TAI LAING DIGIT ZERO..MYANMAR TAI LAING DIGIT NINE
	0xAA50, -- AA50..AA59 ; Decimal # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE
	0xABF0, -- ABF0..ABF9 ; Decimal # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE
	0xFF10, -- FF10..FF19 ; Decimal # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
	0x104A0, -- 104A0..104A9 ; Decimal # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE
	0x10D30, -- 10D30..10D39 ; Decimal # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE
	0x11066, -- 11066..1106F ; Decimal # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE
	0x110F0, -- 110F0..110F9 ; Decimal # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE
	0x11136, -- 11136..1113F ; Decimal # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE
	0x111D0, -- 111D0..111D9 ; Decimal # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE
	0x112F0, -- 112F0..112F9 ; Decimal # Nd [10] KHUDAWADI DIGIT ZERO..KHUDAWADI DIGIT NINE
	0x11450, -- 11450..11459 ; Decimal # Nd [10] NEWA DIGIT ZERO..NEWA DIGIT NINE
	0x114D0, -- 114D0..114D9 ; Decimal # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE
	0x11650, -- 11650..11659 ; Decimal # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE
	0x116C0, -- 116C0..116C9 ; Decimal # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE
	0x11730, -- 11730..11739 ; Decimal # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE
	0x118E0, -- 118E0..118E9 ; Decimal # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE
	0x11950, -- 11950..11959 ; Decimal # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE
	0x11C50, -- 11C50..11C59 ; Decimal # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE
	0x11D50, -- 11D50..11D59 ; Decimal # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE
	0x11DA0, -- 11DA0..11DA9 ; Decimal # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE
	0x16A60, -- 16A60..16A69 ; Decimal # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE
	0x16B50, -- 16B50..16B59 ; Decimal # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE
	0x1D7CE, 0x1D7D8, 0x1D7E2, 0x1D7EC, 0x1D7F6, -- 1D7CE..1D7FF ; Decimal # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE
	0x1E140, -- 1E140..1E149 ; Decimal # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE
	0x1E2F0, -- 1E2F0..1E2F9 ; Decimal # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE
	0x1E950, -- 1E950..1E959 ; Decimal # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE
	0x1FBF0, -- 1FBF0..1FBF9 ; Decimal # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
	-- # Total code points: 650
}
for _, zero in ipairs(zeroes) do
	table.insert(classPattern, textChar(zero) .. '-' .. textChar(zero + 9))
	charSubst[textChar(zero    )] = '0'
	charSubst[textChar(zero + 1)] = '1'
	charSubst[textChar(zero + 2)] = '2'
	charSubst[textChar(zero + 3)] = '3'
	charSubst[textChar(zero + 4)] = '4'
	charSubst[textChar(zero + 5)] = '5'
	charSubst[textChar(zero + 6)] = '6'
	charSubst[textChar(zero + 7)] = '7'
	charSubst[textChar(zero + 8)] = '8'
	charSubst[textChar(zero + 9)] = '9'
end
classPattern = '[' .. table.concat(classPattern) .. ']'

-- Comparator factory for data-dependant sorting.
-- Uses a locale-dependant numeric format for numeric columns.
-- TODO: locale- and Unicode-dependant string collation for non-numeric.
local function comparator(orderLookup, nsortLookup, descLookup, grpsep)
	local discard = '[ \'+' .. grpsep .. '_]' -- pattern for group separators or plus, to discard
    return function(a, b)
        local ad, bd = a.data, b.data
        for _, index in ipairs(orderLookup) do
            local ad, bd = ad[index], bd[index]
            if type(ad) == 'table' then
                ad = ad[1] -- Use the explicit sort key (it should be prefiltered).
                if nsortLookup[index] then -- Numeric sort.
                    ad = tonumber(ad)
                end
            else -- Cell content only, infer a filtered sort key.
                if nsortLookup[index] then -- Numeric sort.
                    -- Convert Unicode to ASCII, then filter blanks and group separators.
                    ad = strGsub(textGsub(ad or '', classPattern, charSubst), discard, '')
                    -- Find the first occurence of a number an use it. Decimal points are allowed.
                    -- Scientific notation not supported. Convert the matches to numbers or nil
                    ad = tonumber(strMatch(ad, '%-?%d*%.%d+') or strMatch(ad, '%-?%d+'))
                end
            end
            if type(bd) == 'table' then
                bd = bd[1] -- Use the explicit sort key (it should be prefiltered).
                if nsortLookup[index] then -- Numeric sort.
                    bd = tonumber(bd)
                end
            else -- Cell content only, infer a filtered sort key.
                if nsortLookup[index] then -- Numeric sort.
                    -- Convert Unicode to ASCII, then filter blanks and group separators.
                    bd = strGsub(textGsub(bd or '', classPattern, charSubst), discard, '')
                    -- Find the first occurence of a number an use it. Decimal points are allowed.
                    -- Scientific notation not supported. Convert the matches to numbers or nil
                    bd = tonumber(strMatch(bd, '%-?%d*%.%d+') or strMatch(bd, '%-?%d+'))
                end
            end
            if ad ~= bd then
                if descLookup[index] then
                    return ad == nil or bd ~= nil and bd < ad
                else
                    return bd == nil or ad ~= nil and ad < bd
                end
            end
        end
        return a.key < b.key
    end
end

_module.create = function(frame)
    -- Named parameters
    local args = frame.args
    local class = args.class
    local style = args.style
    local sep = args.sep or '!!' -- required, must not be empty
    local valsep = args.valsep or '' -- optional, may be empty
    local grpsep = args.grpsep or ',' -- optional, use ',' by default
    local order = args.order
    local desc = args.descending or ''
    local nsort = args.numeric or ''
    local hidden = args.hidden or ''
    local header = args.header
    local footer = args.footer
    local colstyle = args.colstyle

    local seplen = #sep
    local valseplen = #valsep

    local orderLookup, nsortLookup, descLookup, hiddenLookup = {}, {}, {}, {}
    for i, v in ipairs(textSplit(order, '%s*,%s*')) do orderLookup[i] = tonumber(v) end
    for i, v in ipairs(textSplit(nsort, '%s*,%s*')) do nsortLookup[tonumber(v) or 0] = true end
    for i, v in ipairs(textSplit(desc, '%s*,%s*')) do descLookup[tonumber(v) or 0] = true end
    for i, v in ipairs(textSplit(hidden, '%s*,%s*')) do hiddenLookup[tonumber(v) or 0] = true end

    -- Create the table
    local html = htmlCreate()
    local htable = html:tag('table')
    if class then htable:attr('class', class) end
    if style then htable:attr('style', style) end

    -- Parses a row string. The key parameter is used to assign a unique key to the result so that equal rows do not cause sort errors.
    local parse = function(s, key)
        local css
        local firstSep = textIndexOf(s, sep, 1, true) -- true for matching a literal, not a pattern
        if firstSep then -- CSS before first separator
            css = textTrim(textSub(s, 1, firstSep - 1))
            s = textSub(s, firstSep + seplen, -1)
        else -- no CSS
            css = nil
            s = textSub(s, seplen + 1, -1)
        end
        -- detect sort values before the value separator
        local data = textSplit(s, sep, true) -- true for matching a literal, not a pattern
        if valsep then
            for i, v in ipairs(data) do
                local firstSep = textIndexOf(v, valsep, 1, true) -- true for matching a literal, not a pattern
                if firstSep then
                    data[i] = { textTrim(textSub(v, 1, firstSep - 1)), textSub(v, firstSep + valseplen, -1) }
                end
            end
        end
        return { key = key, css = css, sort = s, data = data }
    end

    --[[
    Writes a row to the table.
    css: CSS to apply to the row.
    data: The data (cells) of the row
    _type: Can be 'header', 'footer' or nil.
    ]]
    local writeHtml = function(css, data, _type)
        local row = htable:tag('tr')
        if css then row:attr('style', textTrim(css)) end
        for i, v in ipairs(data) do
            if not hiddenLookup[i] then
                local cell
                if _type == 'header' then
                    -- Header: use the 'th' tag with scope="col"
                    cell = row:tag('th')
                    cell:attr('scope', 'col')
                elseif _type == 'footer' then
                    -- Footer: Mark as 'sortbottom' so that it does not sort when the table is made user-sortable
                    -- with the 'wikitable sortable' class
                    cell = row:tag('td')
                    cell:class('sortbottom')
                else
                    -- Ordinary cell (may have an optional sort-value, separate from the display value)
                    cell = row:tag('td')
                    local cellCss = colstyle and colstyle[i]
                    if cellCss then -- Apply the column styling, if necessary
                        cell:attr('style', textTrim(cellCss))
                    end
                end
                if type(v) == 'table' then
                    cell:attr('data-sort-value', textTrim(v[1]))
                    v = v[2]
                end
                cell:wikitext(textTrim(v))
            end
        end
        return row
    end

    -- Parse the column styles
    if colstyle then colstyle = parse(colstyle, -1).data end

    -- Write the header first
    if header then
        local headerData = parse(header)
        writeHtml(headerData.css, headerData.data, 'header')
    end

    -- Parse the data
    local data = {}
    for i, v in ipairs(frame.args) do data[i] = parse(v, i) end

    -- Sorting with a comparator function
    table.sort(data, comparator(orderLookup, descLookup, nsortLookup, grpsep))

    -- Write the sorted data to the HTML output
    for i, v in ipairs(data) do
    	writeHtml(v.css, v.data, nil)
    end

    -- Write the footer
    if footer then
        local footerData = parse(footer)
        writeHtml(footerData.css, footerData.data, 'footer')
    end

    return tostring(html)
end

return _module