Research:Top edited English Wikipedia articles in 2016
Duration: 2016-12 – 2016-12
This page documents a completed research project.
The comms team reached out for a summary of the most popular articles by edits in 2016. In this project, we'll list out those pages and state their edit counts. See [1] for past analyses.
Methods
editBasically, we're counting up the number of revisions that appear in the revision table for these pages. Since joining page to revision is slow, we'll aggregate revision before joining to page. This will give as a variable top N results that are actually articles from which we can take the top N that we want.
overall sql query
|
---|
SELECT page_namespace, page_title, edits
FROM (
SELECT rev_page, COUNT(*) AS edits
FROM revision
WHERE rev_timestamp BETWEEN "20151210" AND "20161210"
GROUP BY rev_page
ORDER BY edits DESC
LIMIT 500
) AS top_page_edits
INNER JOIN page ON rev_page = page_id
ORDER BY page_namespace, edits DESC;
|
monthly sql query
|
---|
(SELECT month, page_namespace, page_title, edits
FROM (
SELECT LEFT(rev_timestamp, 6) AS month, rev_page, COUNT(*) AS edits
FROM revision
WHERE rev_timestamp BETWEEN "201601" AND "201602"
GROUP BY month, rev_page
ORDER BY edits DESC
LIMIT 500
) AS top_page_edits
INNER JOIN page ON rev_page = page_id
WHERE page_namespace = 0
ORDER BY page_namespace, edits DESC
LIMIT 20)
UNION ALL
(SELECT month, page_namespace, page_title, edits
FROM (
SELECT LEFT(rev_timestamp, 6) AS month, rev_page, COUNT(*) AS edits
FROM revision
WHERE rev_timestamp BETWEEN "201602" AND "201603"
GROUP BY month, rev_page
ORDER BY edits DESC
LIMIT 500
) AS top_page_edits
INNER JOIN page ON rev_page = page_id
WHERE page_namespace = 0
ORDER BY page_namespace, edits DESC
LIMIT 20)
UNION ALL
(SELECT month, page_namespace, page_title, edits
FROM (
SELECT LEFT(rev_timestamp, 6) AS month, rev_page, COUNT(*) AS edits
FROM revision
WHERE rev_timestamp BETWEEN "201603" AND "201604"
GROUP BY month, rev_page
ORDER BY edits DESC
LIMIT 500
) AS top_page_edits
INNER JOIN page ON rev_page = page_id
WHERE page_namespace = 0
ORDER BY page_namespace, edits DESC
LIMIT 20)
UNION ALL
(SELECT month, page_namespace, page_title, edits
FROM (
SELECT LEFT(rev_timestamp, 6) AS month, rev_page, COUNT(*) AS edits
FROM revision
WHERE rev_timestamp BETWEEN "201604" AND "201605"
GROUP BY month, rev_page
ORDER BY edits DESC
LIMIT 500
) AS top_page_edits
INNER JOIN page ON rev_page = page_id
WHERE page_namespace = 0
ORDER BY page_namespace, edits DESC
LIMIT 20)
UNION ALL
(SELECT month, page_namespace, page_title, edits
FROM (
SELECT LEFT(rev_timestamp, 6) AS month, rev_page, COUNT(*) AS edits
FROM revision
WHERE rev_timestamp BETWEEN "201605" AND "201606"
GROUP BY month, rev_page
ORDER BY edits DESC
LIMIT 500
) AS top_page_edits
INNER JOIN page ON rev_page = page_id
WHERE page_namespace = 0
ORDER BY page_namespace, edits DESC
LIMIT 20)
UNION ALL
(SELECT month, page_namespace, page_title, edits
FROM (
SELECT LEFT(rev_timestamp, 6) AS month, rev_page, COUNT(*) AS edits
FROM revision
WHERE rev_timestamp BETWEEN "201606" AND "201607"
GROUP BY month, rev_page
ORDER BY edits DESC
LIMIT 500
) AS top_page_edits
INNER JOIN page ON rev_page = page_id
WHERE page_namespace = 0
ORDER BY page_namespace, edits DESC
LIMIT 20)
UNION ALL
(SELECT month, page_namespace, page_title, edits
FROM (
SELECT LEFT(rev_timestamp, 6) AS month, rev_page, COUNT(*) AS edits
FROM revision
WHERE rev_timestamp BETWEEN "201607" AND "201608"
GROUP BY month, rev_page
ORDER BY edits DESC
LIMIT 500
) AS top_page_edits
INNER JOIN page ON rev_page = page_id
WHERE page_namespace = 0
ORDER BY page_namespace, edits DESC
LIMIT 20)
UNION ALL
(SELECT month, page_namespace, page_title, edits
FROM (
SELECT LEFT(rev_timestamp, 6) AS month, rev_page, COUNT(*) AS edits
FROM revision
WHERE rev_timestamp BETWEEN "201608" AND "201609"
GROUP BY month, rev_page
ORDER BY edits DESC
LIMIT 500
) AS top_page_edits
INNER JOIN page ON rev_page = page_id
WHERE page_namespace = 0
ORDER BY page_namespace, edits DESC
LIMIT 20)
UNION ALL
(SELECT month, page_namespace, page_title, edits
FROM (
SELECT LEFT(rev_timestamp, 6) AS month, rev_page, COUNT(*) AS edits
FROM revision
WHERE rev_timestamp BETWEEN "201609" AND "201610"
GROUP BY month, rev_page
ORDER BY edits DESC
LIMIT 500
) AS top_page_edits
INNER JOIN page ON rev_page = page_id
WHERE page_namespace = 0
ORDER BY page_namespace, edits DESC
LIMIT 20)
UNION ALL
(SELECT month, page_namespace, page_title, edits
FROM (
SELECT LEFT(rev_timestamp, 6) AS month, rev_page, COUNT(*) AS edits
FROM revision
WHERE rev_timestamp BETWEEN "201610" AND "201611"
GROUP BY month, rev_page
ORDER BY edits DESC
LIMIT 500
) AS top_page_edits
INNER JOIN page ON rev_page = page_id
WHERE page_namespace = 0
ORDER BY page_namespace, edits DESC
LIMIT 20)
UNION ALL
(SELECT month, page_namespace, page_title, edits
FROM (
SELECT LEFT(rev_timestamp, 6) AS month, rev_page, COUNT(*) AS edits
FROM revision
WHERE rev_timestamp BETWEEN "201611" AND "201612"
GROUP BY month, rev_page
ORDER BY edits DESC
LIMIT 500
) AS top_page_edits
INNER JOIN page ON rev_page = page_id
WHERE page_namespace = 0
ORDER BY page_namespace, edits DESC
LIMIT 20)
UNION ALL
(SELECT month, page_namespace, page_title, edits
FROM (
SELECT LEFT(rev_timestamp, 6) AS month, rev_page, COUNT(*) AS edits
FROM revision
WHERE rev_timestamp BETWEEN "201612" AND "201701"
GROUP BY month, rev_page
ORDER BY edits DESC
LIMIT 500
) AS top_page_edits
INNER JOIN page ON rev_page = page_id
WHERE page_namespace = 0
ORDER BY page_namespace, edits DESC
LIMIT 20);
|
Result
editResults from the query were manually filtered to only include namespace 0 -- the article namespace.