Gadgets/HarvestLabels

From Meta, a Wikimedia project coordination wiki

More general info about Gadgets

This script add an option to go over all pages in Wikipedia having a specific template, and fill missing labels in Wikidata.

How to use[edit]

  • Go to one of the supported templates (see below how to add support)
  • Press "Add labels" link in the sidebar
  • Wait for few seconds while SPARQL query runs against Wikidata
  • A dialog will open with entity in Wikidata that has a missing label in your language, and a suggestion based on the template. You can edit the label or keep the suggestion, and press save to add it to Wikidata. You can skip labels if you aren't sure.

How to setup[edit]

*[[d:{{{1}}}]]: [[d:{{{2}}}]] - [[{{{3}}}]] ([https://query.wikidata.org/#{{urlencode:SELECT ?item ?sitelink ?itemLabel where {?item wdt:{{{1}}} wd:{{{2}}} . ?sitelink schema:about ?item . ?sitelink schema:inLanguage 'en' . SERVICE wikibase:label{bd:serviceParam wikibase:language "en"} }|PATH}} Query])
(replace "en" to the relevant language code)
{{/row|P31|Q11424|Template:Infobox_film}}
{{/row|P31|Q5|Infobox_person}}

(this tells the script that pages with en:Template:Infobox_person have related entities with d:Property:P31 which is d:Q5)

Script source code[edit]

/*
Harvest labels
=====================
The script queries wikidata for missing labels, and uses infoboxes to suggest labels, and allow users to easily add labels.

Author: [[:he:User:ערן]]
*/
if (mw.config.get('wgNamespaceNumber') === 10) mw.loader.using( [ 'oojs-ui-windows', 'wikibase.api.RepoApi', 'wikibase.client.getMwApiForRepo' ], function(){
var INFOBOX_TO_ENTITIES_MAP_PAGE = 'ויקיפדיה:ויקינתונים/תבניות מידע';// this lists all templates that support it and how to find related entities in Wikidata
var lang='en'
if (mw.config.get('wgUserLanguage')=='he') mw.messages.set({
	'qlabel-adder-title' : 'הוספת תוויות',
	'qlabel-save-continue' : 'שמירה והמשך',
	'qlabel-skip' : 'דילוג',
	'qlbael-sidelink': 'הוספת תוויות',
	'qlabel-save-failed': 'השמירה נכשלה ;( $1', 
	'qlabel-save-success': 'השמירה הצליחה',
	'qlabel-no-results': 'לא נמצאו דפים מתאימים',
	'qlabel-no-parameter-results': 'לא קיים מיפוי לפרמטר בשם $1',
	'qlabel-no-templatedata': 'לא קיימת מפת wikidata בתבנית',
	'qlabel-request-paramname': 'שם הפרמטר בתבנית',
	'qlabel-locallabel-placeholder': 'תווית מתורגמת',
	'qlabel-fields-title': 'הוספת תוויות',
	'qlabel-english-label': 'תווית באנגלית',
	'qlabel-local-label': 'תווית בעברית',
	'qlabel-local-source': 'מבוסס על',
	'qlabel-infobox-missing-mapping': 'חסר מיפוי של התבנית ליישויות בוויקינתונים. נא להוסיף ב$1'
});
else {
mw.messages.set({
	'qlabel-adder-title' : 'Add labels',
	'qlabel-save-continue' : 'Save & continue',
	'qlabel-skip' : 'Skip',
	'qlbael-sidelink': 'Add labels',
	'qlabel-save-failed': 'Save failed ;( $1', 
	'qlabel-save-success': 'Saved',
	'qlabel-no-results': 'No relevant pages found',
	'qlabel-no-parameter-results': 'There is no mapping for parameter $1',
	'qlabel-no-templatedata': 'There is no wikidata map in the template data',
	'qlabel-request-paramname': 'Parameter name',
	'qlabel-locallabel-placeholder': 'Translated label',
	'qlabel-fields-title': 'Add labels',
	'qlabel-english-label': 'English label',
	'qlabel-local-label': 'Local label',
	'qlabel-local-source': 'Based on',
	'qlabel-infobox-missing-mapping': 'There is no mapping between the template and entities in Wikidata. Please add it in $1'
});
}

function HarvestLabelDialog( config ) {
	this.extractorRgx = null;
	this.pageI = -1;
	this.pageClaimI = -1;
	this.batchSize = 20;
	this.currentBatch = 0;
	this.suggestedTranslateVals = {}
	this.currentEntity = null;
	this.translatedEntities = {};
	this.titlesToEntities = {};
	this.phase = 'label-with-suggestions';
	this.wikidataApi = new wikibase.api.RepoApi(wikibase.client.getMwApiForRepo());
	HarvestLabelDialog.super.call( this, config );
}
OO.inheritClass( HarvestLabelDialog, OO.ui.ProcessDialog ); 

// Specify a name for .addWindows()
HarvestLabelDialog.static.name = 'HarvestLabelDialog';
// Specify a title statically (or, alternatively, with data passed to the opening() method). 
HarvestLabelDialog.static.title = mw.msg('qlabel-adder-title');

HarvestLabelDialog.static.actions = [
  { action: 'saveContinue', label: mw.msg('qlabel-save-continue'), flags: [ 'other', 'constructive' ] },
  { action: 'skipOne', label: mw.msg('qlabel-skip'), flags: [ 'other', 'progressive' ] },
  { label: 'Cancel', flags: 'safe' }
];


HarvestLabelDialog.prototype.initialize = function () {
	HarvestLabelDialog.super.prototype.initialize.call( this );
	this.content = new OO.ui.PanelLayout( { padded: true, expanded: true } );
  
	this.sourceLabel = new OO.ui.LabelWidget( { 
		label: ''
	} );

	this.localLabel = new OO.ui.TextInputWidget( { 
		placeholder: mw.msg('qlabel-locallabel-placeholder')
	} );

	this.localSource = new OO.ui.LabelWidget( {
	  label: ''
	} );
	this.fieldset = new OO.ui.FieldsetLayout( { 
		label: mw.msg('qlabel-fields-title'),
		classes: ["container"]
	} );
	this.fieldset.addItems( [ 
		new OO.ui.FieldLayout( this.sourceLabel, {
			label: mw.msg('qlabel-english-label')
		} ),
		new OO.ui.FieldLayout( this.localLabel, { 
			label: mw.msg('qlabel-local-label')
		} ),
		new OO.ui.FieldLayout( this.localSource, { 
			label: mw.msg('qlabel-local-source')
		} )
	] );

	this.content.$element.append( this.fieldset.$element );
	this.$body.append( this.content.$element );
};
  
HarvestLabelDialog.prototype.getBodyHeight = function () {
  return 400;
};


HarvestLabelDialog.prototype.suggestLabels = function () {
	var self = this, dfd = new $.Deferred();
	if (this.localTitles.length < this.currentBatch) return dfd.resolve();
	new mw.Api().get({
		action: 'query',
		prop: 'revisions',
		titles: this.localTitles.slice(this.currentBatch, this.currentBatch+this.batchSize).join('|'),
		rvprop: 'content',
		indexpageids: 1
	 }).done(function(d){
		self.currentBatch += self.batchSize;
		for (var i = 0; i < d.query.pageids.length; i++) {
			 var pageid = d.query.pageids[i];
			 if (pageid < 0) continue;
			 var pageText = d.query.pages[pageid].revisions[0]['*'];
			 var templateVal = self.extractorRgx.exec(pageText);
			 if (!templateVal || templateVal[1].trim().length === 0) continue;
			 templateVal = templateVal[1].replace(/\n/g, ' ').replace(/\[\[([^|\]]+?)\]\]/g, '$1').replace(/{{ש}}/g, ', ').trim();
			 if (/\|.+= *$/.test(templateVal)) continue;

			 self.suggestedTranslateVals[d.query.pages[pageid].title] = templateVal;
		}
		dfd.resolve();
	 });
	return dfd;
}

HarvestLabelDialog.prototype.getSetupProcess = function ( data ) {
	return HarvestLabelDialog.super.prototype.getSetupProcess.call( this, data )
		.next( function () {
			this.templateParamName = data.templateParamName;
			this.localTitles = data.localTitles;
			this.titlesToEntities = data.titlesToEntities;

			//this.extractorRgx = new RegExp(this.templateParamName + ' *=\s*([^=]+)');
			this.extractorRgx = new RegExp(this.templateParamName + ' *=\s*([^=]+)(?=\\|.+=|\}\})');

			var self=this;
			HarvestLabelDialog.prototype.suggestLabels.call(self).done(function(){
				HarvestLabelDialog.prototype.nextLabel.call(self);
			});
		}, this );
};

HarvestLabelDialog.prototype.getActionProcess = function ( action ) {
  var dialog = this;
  switch ( action ) {
	case 'skipOne':
		return new OO.ui.Process( function () {
		  dialog.nextLabel();
		}, this );
	case 'saveContinue':
		return new OO.ui.Process( function () {
		  dialog.saveLabel();
		}, this );
  }

  // Fallback to parent handler.
  return HarvestLabelDialog.super.prototype.getActionProcess.call( this, action );
};

HarvestLabelDialog.prototype.saveLabel = function ( ) {
	if (this.currentEntity)
	{
		this.translatedEntities[this.currentEntity] = 1; //mark as used
		var saveResponse = this.wikidataApi.setLabel(this.currentEntity, 0, this.localLabel.getValue(), mw.config.get('wgContentLanguage'));
		saveResponse.done(function(d) {
			if (d && d.success)	mw.notify(mw.msg('qlabel-save-success'));
			else if (d && d.error) {
				mw.notify(mw.msg('qlabel-save-failed: $1', d.error.info));
			}
		});
	}
	this.translatedEntities[this.currentEntity] = 1;
	this.nextLabel();
}

HarvestLabelDialog.prototype.nextLabel = function ( ) {	
	if ( (this.phase == 'label-with-suggestions') && (this.currentBatch < this.pageI+5) && this.currentBatch<this.localTitles.length) {
		var self = this;
		this.suggestLabels().done(function(){
			HarvestLabelDialog.prototype.nextLabel.call(self);
		});
		return;
	}
	
	if (this.pageI==-1) {
		this.pageI = 0;
		this.pageClaimI = 0;
	}
	else {
		if (this.pageClaimI+1 < this.titlesToEntities[this.localTitles[this.pageI]].length){
			this.pageClaimI++;
		} else {
			this.pageI++;
			this.pageClaimI = 0;
		}
	}

	if (this.pageI === this.localTitles.length)
	{
		if (this.phase == 'label-without-suggestions') {
			this.close();
			return;
		} else {
			this.phase = 'label-without-suggestions';
			this.pageI = -1;
			this.pageClaim = 0;
			this.nextLabel();
			return;
		}
	}

	var entityLabelData = this.titlesToEntities[this.localTitles[this.pageI]][this.pageClaimI];
	this.currentEntity = entityLabelData.entity;
	if (this.translatedEntities[this.currentEntity]) {
		this.nextLabel();
		return;
	}

	if(this.phase == 'label-with-suggestions') {
		var hasSuggestion = this.suggestedTranslateVals.hasOwnProperty(this.localTitles[this.pageI]);
		if (!hasSuggestion) {
			this.nextLabel();
			return;
		}
		this.localLabel.setValue(this.suggestedTranslateVals[this.localTitles[this.pageI]]);
	} else {
		this.localLabel.setValue('');
	}
	this.localSource.setLabel($('<a href="/wiki/'+encodeURI(this.localTitles[this.pageI])+'" target="_blank">'+this.localTitles[this.pageI]+'</a>' ));
	this.sourceLabel.setLabel($('<a href="//www.wikidata.org/wiki/'+entityLabelData.entity+'" target="_blank">'+entityLabelData.entityLabel+'</a>' ));
}

function createHarvestLabelDialog(templateParam, localTitles, titlesToEntities) {
	// Make the window.
	var qLabelAdder = new HarvestLabelDialog( {
	  size: 'medium'
	} );

	// Create and append a window manager, which will open and close the window. 
	var windowManager = new OO.ui.WindowManager();
	$( 'body' ).append( windowManager.$element );

	// Add the window to the window manager using the addWindows() method.
	windowManager.addWindows( [ qLabelAdder ] );

	// Open the window!
	windowManager.openWindow( qLabelAdder,  
								{ 
									templateParamName: templateParam,
					 			    localTitles: localTitles,
									titlesToEntities: titlesToEntities 
								} );
}

function runSparql(query) {
	return $.getJSON('https://query.wikidata.org/sparql?format=json&query=' + query);
}

function queryMissingLabels(entityProp, entityVal, claim)
{
	var query = "SELECT ?item ?sitelink ?linkedprop ?enlabel WHERE { \
 ?item wdt:" + entityProp.split('/').join('/wdt:') + " wd:"+entityVal+" .\
 ?item p:"+claim+" ?statement .\
 ?statement ps:"+claim+" ?linkedprop .\
 ?sitelink schema:about ?item .\
 ?sitelink schema:inLanguage '"+mw.config.get('wgContentLanguage')+"' \
 MINUS {?linkedprop rdfs:label ?locallabel filter(lang(?locallabel) = '"+mw.config.get('wgContentLanguage')+"')}\
 OPTIONAL {?linkedprop rdfs:label ?enlabel filter(lang(?enlabel) = 'en')}\
 SERVICE wikibase:label { bd:serviceParam wikibase:language '"+mw.config.get('wgContentLanguage')+"'	}  \
}";
	return runSparql(query);
}

function getTemplateToWikidataMap() {
	var dfd = new jQuery.Deferred();
	new mw.Api().get({ action:'parse', page: INFOBOX_TO_ENTITIES_MAP_PAGE, prop: 'wikitext' }).done(function(d) {
		var text = d.parse.wikitext['*'],
			extractor = /\{\{.+?\|(.+?)\|(.+?)\|(.+?)\}\}/g,
			templateToWikidata = {}, m;

		while(m = extractor.exec(text)){
			templateToWikidata[m[3]] = [m[1], m[2]];
		}
		if (templateToWikidata[mw.config.get('wgPageName')]) {
			dfd.resolve(templateToWikidata[mw.config.get('wgPageName')]);
			return;
		}
		alert(mw.msg('qlabel-infobox-missing-mapping', INFOBOX_TO_ENTITIES_MAP_PAGE));
		dfd.reject();
	});
	return dfd;
}

function findParamProperty(paramName) { 
	var dfd = new jQuery.Deferred();
	var api = new mw.Api();
    api.get({
        action: 'templatedata',
        titles: mw.config.get('wgPageName'),
        redirects: 1
    }).done(function(data) {
		var templatedata = {};
		for (var pageid in data.pages) {
            templatedata = data.pages[pageid];
        }

		if (!templatedata.maps || ! templatedata.maps.wikidata) {
			alert(mw.msg('qlabel-no-templatedata'));
			dfd.reject();
			return;
		}
		
		for(var wikidataProp in templatedata.maps.wikidata){
			if (paramName == templatedata.maps.wikidata[wikidataProp]){
				dfd.resolve(wikidataProp);
				return;
			}
		}

		alert(mw.msg('qlabel-no-parameter-results', paramName));
		dfd.reject();
	});
	return dfd;
}

function createaddLabelsButton(){
	var addLabelWizard = $(mw.util.addPortletLink(
					'p-tb',
					'#',
					mw.msg('qlbael-sidelink'),
					't-harvestlabel',
					mw.msg('qlabel-invoke'),
					null,
					'#t-whatlinkshere'
	));
	

	addLabelWizard.click(function(e){
		var templateParam = prompt(mw.msg('qlabel-request-paramname'));
		if (!templateParam) return;
		$.when(findParamProperty(templateParam), getTemplateToWikidataMap()).done(function(wikidataProp, wikidataMap){
			queryMissingLabels(wikidataMap[0], wikidataMap[1], wikidataProp).done(function(d) {
				var localTitles = [],
					titlesToEntities={};
				 $.each(d.results.bindings, function(i, e){
					 var title = decodeURIComponent(e.sitelink.value.split('/wiki/')[1]);
					 title = new mw.Title(title).getNameText(); // replace _ to space similar to Wikipedia API responses
					 localTitles.push(title);
					 var linkedEntity = e.linkedprop.value.split('/entity/')[1];
					 var enLabel = (e.enlabel && e.enlabel && e.enlabel.value)?  e.enlabel.value : linkedEntity;
					 if (titlesToEntities.hasOwnProperty(title)) {
						titlesToEntities[title].push({ 'entity': linkedEntity, 'entityLabel': enLabel });
					 } else {
						 titlesToEntities[title] = [{ 'entity': linkedEntity, 'entityLabel': enLabel }];
					 }
				 });
				 if (localTitles.length == 0) {
					 mw.notify(mw.msg('qlabel-no-results'));
					return;
				 }
				 mw.loader.using('oojs-ui-windows', function() { createHarvestLabelDialog(templateParam, localTitles, titlesToEntities); } );
			});				
		});
		e.preventDefault();
	});
	return addLabelWizard;
}

createaddLabelsButton();
});