summaryrefslogtreecommitdiff
blob: 042039f02bca9c09eca591e06a8e0f3db3921685 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
<?php
/**
 * Script to gather translator stats.
 *
 * @author Niklas Laxström
 * @license GPL-2.0+
 * @file
 */

// Standard boilerplate to define $IP
if ( getenv( 'MW_INSTALL_PATH' ) !== false ) {
	$IP = getenv( 'MW_INSTALL_PATH' );
} else {
	$dir = __DIR__;
	$IP = "$dir/../../..";
}
require_once "$IP/maintenance/Maintenance.php";

class TSP extends Maintenance {
	public function __construct() {
		parent::__construct();
		$this->mDescription = 'Script to calculate monthly stats about tsv data produced ' .
			'by translator-stats.php.';
		$this->addArg(
			'file',
			'tsv file to process'
		);
	}

	function median( $a ) {
		sort( $a );
		$len = count( $a );
		if ( $len === 0 ) {
			return 0;
		} elseif( $len === 1 ) {
			return $a[0];
		} elseif ( $len % 2 === 0 ) {
			return $a[$len/2];
		} else {
			return ( $a[floor( $len/2 )] + $a[ceil( $len/2 )] ) / 2;
		}
	}

	public function execute() {
		$handle = fopen( $this->getArg( 0 ), 'r' );
		// remove heading
		fgets( $handle );

		$data = array();
		while ( ( $l = fgets( $handle ) ) !== false ) {
			$fields = explode( "\t", trim( $l, "\n" ) );
			list( $name, $reg, $edits, $translator, $promoted, $method ) = $fields;
			$month = substr( $reg, 0, 4 ) . '-' . substr( $reg, 4, 2 ) . '-01';
			$data[$month][] = $fields;
		}

		fclose( $handle );

		ksort( $data );

		echo "period\tnew\tpromoted\tgood\tmedian promotion time\t" .
		"avg promotion time\tsandbox approval rate\n";

		foreach ( $data as $key => $period ) {
			$total = 0;
			$promoted = 0;
			$good = 0;
			$delay = array();
			$median = 0;
			$avg = 'N/A';
			$sbar = array();

			foreach ( $period as $p ) {
				list( $name, $reg, $edits, $translator, $promtime, $method ) = $p;
				$total++;
				if ( $translator === 'translator' ) {
					$promoted++;
				}

				if ( $edits > 100 ) {
					$good++;
				}

				if ( $promtime ) {
					$delay[] = wfTimestamp( TS_UNIX, $promtime ) - wfTimestamp( TS_UNIX, $reg );
				}

				if ( $method === 'sandbox' ) {
					if ( $promtime ) {
						$sbar[] = true;
					} else {
						$sbar[] = false;
					}
				}

			}

			$median = round( $this->median( $delay ) / 3600 );
			if ( count( $delay ) ) {
				$avg = round( array_sum( $delay ) / count( $delay ) / 3600 );
			}

			if ( $sbar === array() ) {
				$sbar = 'N/A';
			} else {
				$sbar = count( array_filter( $sbar ) ) / count( $sbar );
			}

			echo "$key\t$total\t$promoted\t$good\t$median\t$avg\t$sbar\n";
		}
	}
}

$maintClass = 'TSP';
require_once RUN_MAINTENANCE_IF_MAIN;