Jump to content

Research talk:Revision scoring as a service/Work log/2015-06-13

Add topic
From Meta, a Wikimedia project coordination wiki

Saturday, June 13, 2015

[edit]

Hey folks. Today I'm auto-labeling the edits of a bot in fawiki.

The bot's user_id is 374638.

$dbstore staging -e 'select rev_id from rev_ids_20k_sample INNER JOIN fawiki.revision USING (rev_id) WHERE wiki = "fawiki" AND rev_user = 374638;' > projects/labels/datasets/edit_quality/rev_ids.20k_sample.fawiki.bot.tsv

$ wc projects/labels/datasets/edit_quality/rev_ids.20k_sample.fawiki.bot.tsv
 357  357 3211 projects/labels/datasets/edit_quality/rev_ids.20k_sample.fawiki.bot.tsv

That'll get rid of 357 labels.

wikilabels=> CREATE TEMPORARY TABLE rev_ids_to_filter (rev_id INT);
CREATE TABLE
wikilabels=> \copy rev_ids_to_filter FROM 'datasets/rev_ids.20k_sample.fawiki.bot.tsv' WITH CSV HEADER;
wikilabels=> SELECT COUNT(*) FROM rev_ids_to_filter;
 count 
-------
   356
(1 row)

wikilabels=> SELECT id, wiki, name FROM campaign WHERE active;
 id |  wiki  |                         name                          
----+--------+-------------------------------------------------------
  4 | enwiki | Edit quality (20k random sample, 2015)
  8 | azwiki | Edit quality (20k random sample, 2015)
  5 | trwiki | Değişiklik kalitesi (20,000 rastgele örnekleme, 2015)
  6 | fawiki | کیفیت ویرایش (نمونه تصادفی ۲۰ هزارتایی، ۲۰۱۵)
  7 | ptwiki | Qualidade das edições (amostra de 20k revisões, 2015)
  9 | frwiki | Modifier la qualité (20k échantillon aléatoire, 2015)
(6 rows)

wikilabels=> INSERT INTO workset (campaign_id, user_id, created, expires) VALUES (6, 41948920, NOW(), NOW()) RETURNING id;
 id  
-----
 180
(1 row)

INSERT 0 1
wikilabels=> SELECT 180, task.id FROM task WHERE campaign_id = 6 AND (task.data->'rev_id')::text::INT IN (SELECT rev_id FROM rev_ids_to_filter LIMIT 10);
 ?column? |   id   
----------+--------
      180 | 143735
      180 | 144386
      180 | 145651
      180 | 146765
      180 | 147332
      180 | 150629
      180 | 150783
      180 | 151713
      180 | 152154
      180 | 159347
(10 rows)

wikilabels=> INSERT INTO workset_task SELECT 180, task.id FROM task WHERE campaign_id = 6 AND (task.data->'rev_id')::text::INT IN (SELECT rev_id FROM rev_ids_to_filter);
INSERT 0 356

wikilabels=> SELECT task_id, user_id, NOW(), '{"damaging":false,"goodfaith":true,"unsure":true,"automatic":true}'::json FROM workset INNER JOIN workset_task ON workset.id = workset_id WHERE workset.id = 180 LIMIT 2;

wikilabels=> INSERT INTO label SELECT task_id, user_id, NOW(), '{"damaging":false,"goodfaith":true,"unsure":true,"automatic":true}'::json FROM workset INNER JOIN workset_task ON workset.id = workset_id WHERE workset.id = 180;
INSERT 0 356

Done --EpochFail (talk) 15:23, 13 June 2015 (UTC)Reply