Research talk:Newcomer task suggestions/Work log/2014-09-29

Add topic
From Meta, a Wikimedia project coordination wiki

Monday, September 29th[edit]

Today I did most of my work with User:Nettrom here: http://etherpad.wikimedia.org/p/task_recommendations_flow Here's the important bits:

We settled on three questions:

  1. At what rate do newcomers accept recommendations (click)?
  2. At what rate do newcomers edit articles they accept?
  3. How do the types of edits made in each condition differ?
    • Compare proportion of main namespace edits between conditions.
    • Compare the rate of section edits between conditions (parse comment).
    • Compare the # of bytes changed between conditions (rev_len - parent.rev_len).
    • Compare the length of pages at at the time edit between conditions (parent.rev_len).

I built SQL for the first two questions.

user_recommendation_stats
CREATE TEMPORARY TABLE staging.user_set
SELECT DISTINCT wiki, event_userId, event_setId
FROM TaskRecommendationImpression_9266226;

CREATE TEMPORARY TABLE staging.recommendations_seen
SELECT
    wiki,
    event_userId as user_id,
    SUM(recommendations_seen) AS recommendations_seen
FROM (
    SELECT
        wiki,
        event_userId,
        event_setId,
        MAX(impression.event_offset) + 3 AS recommendations_seen
    FROM ServerSideAccountCreation_5487345 ssac
    INNER JOIN TaskRecommendationImpression_9266226 impression USING
            (wiki, event_userId)
    WHERE
        impression.timestamp BETWEEN
            ssac.timestamp AND
            DATE_FORMAT(DATE_ADD(ssac.timestamp, INTERVAL 7 DAY),
                        "%Y%m%d%H%i%S")
    GROUP BY wiki, event_userId, event_setId
) AS recommendation_sets
GROUP BY wiki, user_id;

CREATE TEMPORARY TABLE staging.recommendations_accepted
SELECT
    wiki,
    event_userId AS user_id,
    COUNT(click.id) AS recommendations_accepted
FROM staging.user_set
INNER JOIN ServerSideAccountCreation_5487345 ssac USING (wiki, event_userId)
INNER JOIN TaskRecommendationClick_9266317 click USING (wiki, event_setId)
WHERE
    click.timestamp BETWEEN
        ssac.timestamp AND
        DATE_FORMAT(DATE_ADD(ssac.timestamp, INTERVAL 7 DAY), "%Y%m%d%H%i%S")
GROUP BY wiki, user_id;

SELECT
    wiki,
    user_id,
    IFNULL(recommendations_seen, 0) AS recommendations_seen,
    IFNULL(recommendations_accepted, 0) AS recommendations_accepted
FROM staging.tr_experimental_user
LEFT JOIN staging.recommendations_seen USING (wiki, user_id)
LEFT JOIN staging.recommendations_accepted USING (wiki, user_id);
user_edit_stats
SELECT
    wiki,
    user_id,
    IFNULL(accepted_edits, 0) AS accepted_edits
FROM staging.tr_experimental_user
LEFT JOIN (
    SELECT
        DATABASE() AS wiki,
        event_userId AS user_id,
        COUNT(*) AS accepted_edits
    FROM (
        SELECT DISTINCT wiki, event_userId, event_setId
        FROM log.TaskRecommendationImpression_9266226
        WHERE wiki = DATABASE()
    ) AS user_set
    INNER JOIN log.ServerSideAccountCreation_5487345 ssac USING
            (wiki, event_userId)
    INNER JOIN log.TaskRecommendationClick_9266317 click USING
            (event_setId)
    INNER JOIN revision ON
        rev_user = event_userId AND
        rev_page = click.event_pageId AND
        rev_timestamp >= click.timestamp
    WHERE
        ssac.wiki = DATABASE() AND
        rev_timestamp BETWEEN
            ssac.timestamp AND
            DATE_FORMAT(DATE_ADD(ssac.timestamp, INTERVAL 7 DAY),
                        "%Y%m%d%H%i%S")
    GROUP BY wiki, user_id
) AS accepted_edits USING (wiki, user_id);

Those datasets are loaded and ready to go for tomorrow. I think I'll need to dig into the types of pages edited in the AM. --Halfak (WMF) (talk) 23:10, 29 September 2014 (UTC)Reply