Module:WMF user data requests

Module documentation
-- Generate graphs of requests for WMF private user data, from [[commons:Data:Requests for WMF user data.tab]]

function monthIndex( monthName )
	return ( {
		["jan"] = 1,
		["feb"] = 2,
		["mar"] = 3,
		["apr"] = 4,
		["may"] = 5,
		["jun"] = 6,
		["jul"] = 7,
		["aug"] = 8,
		["sep"] = 9,
		["oct"] = 10,
		["nov"] = 11,
		["dec"] = 12,
	} )[ monthName ]
end

function dateRangeFormat( dateStr )
	local startM, startY, endM, endY = string.match( dateStr, "(...)(%d?%d?)(...)(%d%d)" )
	
	startY = startY == '' and endY or startY
	function dateFormat( year, month, addOne )
		local mIndex = monthIndex( month )
		if addOne then
			mIndex = mIndex + 1
			if mIndex == 13 then
				mIndex = 1
				year = year + 1
			end
		end
		return '20' .. year .. '-' .. ( mIndex < 10 and '0' or '' ) .. mIndex
	end
	return dateFormat( startY, startM ), dateFormat( endY, endM, true )
end

function getData( countryName )
	local data = mw.ext.data.get( 'Requests for WMF user data.tab' ).data
	local r = {}
	
	local firstCol = 6
	
	for c = 1, #data do
		local row = data[ c ]
		local startDate, endDate = dateRangeFormat( row[ #row ] )
		if row[ 1 ] == countryName then
			for i = firstCol, #row - 1, 3 do
				for ii = 0, 2 do -- Total, partial, complied
					local o = row[ i + ii ]
					local qq = ( {
						[0] = 'Not complied',
						[1] = 'Partial compliance',
						[2] = 'Complied'
					} )[ ii ]
					--mw.log( o )
					local j = ( ii == 0 ) and row[ i + ii + 1 ] + row[ i + ii + 2 ] or 0
					if o - j > 0 then
						table.insert( r, {
							reqType = ( i - firstCol ) / 3,
							complied = ii,
							value = o - j,
							startDate = startDate, endDate = endDate
						} )
					end
				end
			end
		end
	end
	return mw.text.jsonEncode( r )
end

function generateGraph( countryName )
	
	local data = getData( countryName )
	-- TODO: Make these translatable.
	local i18nReqTypes = [[
		[
			"Civil subpoena",
			"Criminal subpoena",
			"Informal government request",
			"Search warrant",
			"Court order",
			"Administrative subpoena"
		]
	]]
	local i18nComplied = '["Not complied","Partial compliance","Complied"]'
	
	local graphJson = [[
{
  "width": 300,
  "height": 150,
  "data": [
    {
      "name": "table",
      "values": ]] .. data .. [[,
      "format": {
        "type": "json",
        "parse": {"startDate": "date","endDate": "date"}
      }
    },
    {
      "name": "agg",
      "source": "table",
      "transform": [
        {
          "type": "aggregate",
          "groupby": ["startDate","endDate","reqType"],
          "summarize": [{"field": "value","ops": ["sum"]}]
        }
      ]
    }
  ],
  "scales": [
    {
      "name": "dateScale",
      "type": "time",
      "domain": {"data": "table","field": ["startDate","endDate"]},
      "range": "width"
    },
    {
      "name": "rType",
      "type": "ordinal",
      "domain": {"data": "table","field": "reqType"},
      "range": "height",
      "padding": 0.01
    },
    {
      "name": "color",
      "type": "ordinal",
      "domain": {"data": "table","field": "complied"},
      "range": "category10"
    },
    {
      "name": "typeLabels",
      "type": "ordinal",
      "domain": [0,1,2,3,4,5],
      "range": ]] .. i18nReqTypes .. [[
    },
    {
      "name": "cLabels",
      "type": "ordinal",
      "domain": [0,1,2],
      "range": ]] .. i18nComplied .. [[
    }
  ],
  "axes": [
    {
      "type": "x",
      "scale": "dateScale",
      "tickSize": 1,
      "nice":"year",
      "tickPadding": 8,
      "grid": true,
      "properties": {
        "labels": {
          "angle": {"value": 30},
          "align": {"value": "left"},
          "baseline": {"value": "middle"}
        }
      }
    },
    {
      "type": "y",
      "scale": "rType",
      "offset": 0,
      "ticks": {"value": 0},
      "properties": {
        "labels": {
          "offset": {"value": 290},
          "angle": {"value": 0},
          "dy": {"value": 0},
          "text": {"scale": "typeLabels"}
        }
      }
    }
  ],
  "marks": [
    {
      "type": "group",
      "from": {
        "data": "table",
        "transform": [
          {
            "type": "stack",
            "groupby": ["startDate","endDate","reqType"],
            "field": "value",
            "sortby": ["-complied"]
          },
          {
            "type": "facet",
            "groupby": ["reqType","startDate","endDate"]
          }
        ]
      },
      "marks": [
        {
          "type": "group",
          "from": {
            "transform": [{"type": "facet","groupby": ["reqType"]}]
          },
          "properties": {
            "enter": {
              "y": {
                "scale": "rType",
                "field": "reqType",
                "offset": 2
              },
              "height": {"scale": "rType","band": true,"offset": -2}
            }
          },
          "scales": [
            {
              "name": "barScale",
              "type": "linear",
              "domain": {"data": "agg","field": "sum_value"},
              "range": "height",
              "nice": true
            }
          ],
          "marks": [
            {
              "type": "rect",
              "properties": {
                "enter": {
                  "x": {
                    "scale": "dateScale",
                    "field": "startDate",
                    "offset": 2
                  },
                  "x2": {
                    "scale": "dateScale",
                    "field": "endDate",
                    "offset": -1
                  },
                  "y": {
                    "scale": "barScale",
                    "field": "layout_end",
                    "offset": 1
                  },
                  "y2": {
                    "scale": "barScale",
                    "field": "layout_start",
                    "offset": -1
                  },
                  "fill": {"scale": "color","field": "complied"}
                }
              }
            }
          ]
        }
      ]
    },
    {
      "type": "rect",
      "from": {
        "data": "table",
        "transform": [{"type": "facet","groupby": ["reqType"]}]
      },
      "properties": {
        "enter": {
          "y": {"scale": "rType","field": "reqType","offset": 0},
          "x": {"value": 0},
          "x2": {"field": {"group": "width"}},
          "height": {"value": 1},
          "fill": {"value": "#000"}
        }
      }
    }
  ],
  "legends": [
    {
      "fill": "color",
      "title": "",
      "properties": {"labels": {"text": {"scale": "cLabels"}}}
    }
  ]
}
	]]
	return graphJson
end

return {
	testGraph = function( countryName )
		return generateGraph( countryName )
	end,
	graph = function( frame )
		local countryName = frame.args.countryName
		
		return mw.getCurrentFrame():extensionTag( {
			name = 'graph',
			content = generateGraph( countryName )
		} )
	end
}