Research talk:Revision scoring as a service/Work log/2015-05-16

From Meta, a Wikimedia project coordination wiki
Jump to navigation Jump to search

Saturday, May 16, 2015[edit]

Today I am filling in automatically generated labels for privileged users.

Here's the query I am using to get the priv. user edits:

SELECT DISTINCT "enwiki" AS wiki, rev_id
FROM staging.rev_ids_20k_sample
INNER JOIN enwiki.revision USING(rev_id)
INNER JOIN enwiki.user_groups ON ug_user = rev_user
WHERE wiki = "enwiki" AND
      ug_group IN ("oversight", "bot", "rollbacker", "checkuser", "abusefilter", "reviewer",
                   "sysop", "templateeditor", "autoreviewer", "ipblock-exempt", "filemover",
                   "import", "accountcreator", "bureaucrat", "eliminator")
UNION ALL
SELECT DISTINCT "ptwiki" AS wiki, rev_id
FROM staging.rev_ids_20k_sample
INNER JOIN ptwiki.revision USING(rev_id)
INNER JOIN ptwiki.user_groups ON ug_user = rev_user
WHERE wiki = "ptwiki" AND
      ug_group IN ("bot", "autoreviewer", "sysop", "bureaucrat", "rollbacker", "checkuser",
                   "eliminator", "oversight")
UNION ALL
SELECT DISTINCT "trwiki" AS wiki, rev_id
FROM staging.rev_ids_20k_sample
INNER JOIN trwiki.revision USING(rev_id)
INNER JOIN trwiki.user_groups ON ug_user = rev_user
WHERE wiki = "trwiki" AND
      ug_group IN ("bot", "autoreviewer", "sysop", "bureaucrat", "rollbacker", "checkuser",
                   "eliminator", "oversight")
UNION ALL
SELECT DISTINCT "fawiki" AS wiki, rev_id
FROM staging.rev_ids_20k_sample
INNER JOIN trwiki.revision USING(rev_id)
INNER JOIN trwiki.user_groups ON ug_user = rev_user
WHERE wiki = "fawiki" AND
      ug_group IN ("templateeditor", "OTRS-member", "sysop", "rollbacker", "bureaucrat",
                   "Image-reviewer", "autopatrol", "patroller", "botadmin", "abusefilter",
                   "bot", "eliminator");

Now to import the data to the labels DB and perform the auto-labeling. I'll be using my test user User:Wiki labeler to do the work.

wikilabels=> CREATE TEMPORARY TABLE rev_ids_to_filter (rev_id INT, wiki VARCHAR(50));
CREATE TABLE
wikilabels=> \copy rev_ids_to_filter FROM './rev_ids.20k_sample.filtered.tsv';
wikilabels=> select count(*) FROM rev_ids_to_filter;
 count 
-------
 43078
(1 row)

wikilabels=> select * from rev_ids_to_filter limit 2;
  rev_id   |  wiki  
-----------+--------
 604245449 | enwiki
 604251853 | enwiki
(2 rows)

wikilabels=> \d workset
                                      Table "public.workset"
   Column    |            Type             |                      Modifiers                       
-------------+-----------------------------+------------------------------------------------------
 id          | integer                     | not null default nextval('workset_id_seq'::regclass)
 campaign_id | integer                     | 
 user_id     | integer                     | 
 created     | timestamp without time zone | 
 expires     | timestamp without time zone | 
Indexes:
    "workset_pkey" PRIMARY KEY, btree (id)
    "workset_user" btree (user_id)

wikilabels=> select id, wiki, name from campaign where active;
 id |  wiki  |                         name                          
----+--------+-------------------------------------------------------
  4 | enwiki | Edit quality (20k random sample, 2015)
  8 | azwiki | Edit quality (20k random sample, 2015)
  5 | trwiki | Değişiklik kalitesi (20,000 rastgele örnekleme, 2015)
  6 | fawiki | کیفیت ویرایش (نمونه تصادفی ۲۰ هزارتایی، ۲۰۱۵)
  7 | ptwiki | Qualidade das edições (amostra de 20k revisões, 2015)
(5 rows)


wikilabels=> INSERT INTO workset (campaign_id, user_id, created, expires) VALUES (4, 41948920, NOW(), NOW()) RETURNING id;
 id 
----
 83
(1 row)

INSERT 0 1


wikilabels=> INSERT INTO workset (campaign_id, user_id, created, expires) VALUES (5, 41948920, NOW(), NOW()) RETURNING id;
 id 
----
 84
(1 row)

INSERT 0 1
wikilabels=> INSERT INTO workset (campaign_id, user_id, created, expires) VALUES (6, 41948920, NOW(), NOW()) RETURNING id;
 id 
----
 85
(1 row)

INSERT 0 1
wikilabels=> INSERT INTO workset (campaign_id, user_id, created, expires) VALUES (7, 41948920, NOW(), NOW()) RETURNING id;
 id 
----
 86
(1 row)

INSERT 0 1

wikilabels=> \d workset_task
   Table "public.workset_task"
   Column   |  Type   | Modifiers 
------------+---------+-----------
 workset_id | integer | not null
 task_id    | integer | not null
Indexes:
    "workset_task_pkey" PRIMARY KEY, btree (workset_id, task_id)
Foreign-key constraints:
    "workset_task_task_id_fkey" FOREIGN KEY (task_id) REFERENCES task(id)


wikilabels=> SELECT 83, task.id FROM task WHERE campaign_id = 4 AND (task.data->'rev_id')::text::int IN (SELECT rev_id FROM rev_ids_to_filter WHERE wiki = 'enwiki' LIMIT 10);
 ?column? |   id   
----------+--------
       83 | 180109
       83 | 184846
       83 | 186126
       83 | 190804
       83 | 192186
       83 | 192371
       83 | 193597
       83 | 195389
       83 | 198193
       83 | 198394
(10 rows)

wikilabels=> INSERT INTO workset_task SELECT 83, task.id FROM task WHERE campaign_id = 4 AND (task.data->'rev_id')::text::int IN (SELECT rev_id FROM rev_ids_to_filter WHERE wiki = 'enwiki');
INSERT 0 9557

wikilabels=> INSERT INTO workset_task SELECT 84, task.id FROM task WHERE campaign_id = 5 AND (task.data->'rev_id')::text::int IN (SELECT rev_id FROM rev_ids_to_filter WHERE wiki = 'trwiki');
INSERT 0 5470
wikilabels=> INSERT INTO workset_task SELECT 85, task.id FROM task WHERE campaign_id = 6 AND (task.data->'rev_id')::text::int IN (SELECT rev_id FROM rev_ids_to_filter WHERE wiki = 'fawiki');
INSERT 0 17485
wikilabels=> INSERT INTO workset_task SELECT 86, task.id FROM task WHERE campaign_id = 7 AND (task.data->'rev_id')::text::int IN (SELECT rev_id FROM rev_ids_to_filter WHERE wiki = 'ptwiki');
INSERT 0 10566
wikilabels=> \d label
                Table "public.label"
  Column   |            Type             | Modifiers 
-----------+-----------------------------+-----------
 task_id   | integer                     | not null
 user_id   | integer                     | not null
 timestamp | timestamp without time zone | 
 data      | json                        | 
Indexes:
    "label_pkey" PRIMARY KEY, btree (task_id, user_id)
    "label_user" btree (user_id)
                                                             ^

wikilabels=> SELECT task_id, user_id, NOW(), '{"damaging":false,"goodfaith":true,"unsure":true,"automatic":true}'::json FROM workset INNER JOIN workset_task ON workset.id = workset_id WHERE workset.id IN (83, 84, 85, 86) limit 2;
 task_id | user_id  |              now              |                                json                                
---------+----------+-------------------------------+--------------------------------------------------------------------
  180077 | 41948920 | 2015-05-16 16:16:31.889927+00 | {"damaging":false,"goodfaith":true,"unsure":true,"automatic":true}
  180079 | 41948920 | 2015-05-16 16:16:31.889927+00 | {"damaging":false,"goodfaith":true,"unsure":true,"automatic":true}
(2 rows)

wikilabels=> SELECT COUNT(*) FROM (SELECT task_id, user_id, NOW(), '{"damaging":false,"goodfaith":true,"unsure":true,"automatic":true}'::json FROM workset INNER JOIN workset_task ON workset.id = workset_id WHERE workset.id IN (83, 84, 85, 86)) AS foo;
 count 
-------
 43078
(1 row)

wikilabels=> INSERT INTO label SELECT task_id, user_id, NOW(), '{"damaging":false,"goodfaith":true,"unsure":true,"automatic":true}'::json FROM workset INNER JOIN workset_task ON workset.id = workset_id WHERE workset.id IN (83, 84, 85, 86);
INSERT 0 43078

Yes check.svg Done --EpochFail (talk) 16:22, 16 May 2015 (UTC)