1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
|
<?php
declare( strict_types = 1 );
namespace MediaWiki\Extension\Translate\Statistics;
use Language;
use MediaWiki\Config\ServiceOptions;
use MessageGroups;
use TranslateUtils;
use Wikimedia\ObjectFactory;
use const TS_MW;
/**
* Provides translation stats data
* @author Abijeet Patro
* @license GPL-2.0-or-later
* @since 2020.09
*/
class TranslationStatsDataProvider {
public const CONSTRUCTOR_OPTIONS = [
'TranslateStatsProviders'
];
/** @var ObjectFactory */
private $objectFactory;
/** @var ServiceOptions */
private $options;
public function __construct( ServiceOptions $options, ObjectFactory $objectFactory ) {
$this->options = $options;
$this->objectFactory = $objectFactory;
}
private function getGraphSpecifications(): array {
return array_filter( $this->options->get( 'TranslateStatsProviders' ) );
}
public function getGraphTypes(): array {
return array_keys( $this->getGraphSpecifications() );
}
/**
* Fetches and preprocesses graph data that can be fed to graph drawer.
* @param TranslationStatsGraphOptions $opts
* @param Language $language
* @return array ( string => array ) Data indexed by their date labels.
*/
public function getGraphData( TranslationStatsGraphOptions $opts, Language $language ): array {
$dbr = wfGetDB( DB_REPLICA );
$so = $this->getStatsProvider( $opts->getValue( 'count' ), $opts );
$fixedStart = $opts->getValue( 'start' ) !== '';
$now = time();
$period = 3600 * 24 * $opts->getValue( 'days' );
if ( $fixedStart ) {
$cutoff = (int)wfTimestamp( TS_UNIX, $opts->getValue( 'start' ) );
} else {
$cutoff = $now - $period;
}
$cutoff = self::roundTimestampToCutoff( $opts->getValue( 'scale' ), $cutoff, 'earlier' );
$start = $cutoff;
if ( $fixedStart ) {
$end = self::roundTimestampToCutoff( $opts->getValue( 'scale' ), $start + $period, 'later' ) - 1;
} else {
$end = null;
}
$tables = [];
$fields = [];
$conds = [];
$type = __METHOD__;
$options = [];
$joins = [];
$so->preQuery( $tables, $fields, $conds, $type, $options, $joins, $start, $end );
$res = $dbr->select( $tables, $fields, $conds, $type, $options, $joins );
wfDebug( __METHOD__ . "-queryend\n" );
// Start processing the data
$dateFormat = $so->getDateFormat();
$increment = self::getIncrement( $opts->getValue( 'scale' ) );
$labels = $so->labels();
$keys = array_keys( $labels );
$values = array_pad( [], count( $labels ), 0 );
$defaults = array_combine( $keys, $values );
$data = [];
// Allow 10 seconds in the future for processing time
$lastValue = $end ?? $now + 10;
while ( $cutoff <= $lastValue ) {
$date = $language->sprintfDate( $dateFormat, wfTimestamp( TS_MW, $cutoff ) );
$cutoff += $increment;
$data[$date] = $defaults;
}
// Ensure $lastValue is within range, in case the loop above jumped over it
$data[$language->sprintfDate( $dateFormat, wfTimestamp( TS_MW, $lastValue ) )] = $defaults;
// Processing
$labelToIndex = array_flip( $labels );
foreach ( $res as $row ) {
$indexLabels = $so->indexOf( $row );
if ( $indexLabels === false ) {
continue;
}
foreach ( $indexLabels as $i ) {
if ( !isset( $labelToIndex[$i] ) ) {
continue;
}
$date = $language->sprintfDate( $dateFormat, $so->getTimestamp( $row ) );
// Ignore values outside range
if ( !isset( $data[$date] ) ) {
continue;
}
$data[$date][$labelToIndex[$i]]++;
}
}
// Don't display dummy label
if ( count( $labels ) === 1 && $labels[0] === 'all' ) {
$labels = [];
}
foreach ( $labels as &$label ) {
if ( strpos( $label, '@' ) === false ) {
continue;
}
[ $groupId, $code ] = explode( '@', $label, 2 );
if ( $code && $groupId ) {
$code = TranslateUtils::getLanguageName( $code, $language->getCode() ) . " ($code)";
$group = MessageGroups::getGroup( $groupId );
$group = $group ? $group->getLabel() : $groupId;
$label = "$group @ $code";
} elseif ( $code ) {
$label = TranslateUtils::getLanguageName( $code, $language->getCode() ) . " ($code)";
} elseif ( $groupId ) {
$group = MessageGroups::getGroup( $groupId );
$label = $group ? $group->getLabel() : $groupId;
}
}
// Indicator that the last value is not full
if ( $end === null ) {
// Warning: do not user array_splice, which does not preserve numerical keys
$last = end( $data );
$key = key( $data );
unset( $data[$key] );
$data[ "$key*" ] = $last;
}
return [ $labels, $data ];
}
/** @noinspection PhpIncompatibleReturnTypeInspection */
private function getStatsProvider( string $type, TranslationStatsGraphOptions $opts ): TranslationStatsInterface {
$specs = $this->getGraphSpecifications();
return $this->objectFactory->createObject(
$specs[$type],
[
'allowClassName' => true,
'extraArgs' => [ $opts ],
]
);
}
/**
* Gets the closest earliest timestamp that corresponds to start of a
* period in given scale, like, midnight, monday or first day of the month.
*/
private static function roundTimestampToCutoff(
string $scale, int $cutoff, string $direction = 'earlier'
): int {
$dir = $direction === 'earlier' ? -1 : 1;
/* Ensure that the first item in the graph has full data even
* if it doesn't align with the given 'days' boundary */
if ( $scale === 'hours' ) {
$cutoff += self::roundingAddition( $cutoff, 3600, $dir );
} elseif ( $scale === 'days' ) {
$cutoff += self::roundingAddition( $cutoff, 86400, $dir );
} elseif ( $scale === 'weeks' ) {
/* Here we assume that week starts on monday, which does not
* always hold true. Go Xwards day by day until we are on monday */
while ( date( 'D', $cutoff ) !== 'Mon' ) {
$cutoff += $dir * 86400;
}
// Round to nearest day
$cutoff -= ( $cutoff % 86400 );
} elseif ( $scale === 'months' ) {
// Go Xwards/ day by day until we are on the first day of the month
while ( date( 'j', $cutoff ) !== '1' ) {
$cutoff += $dir * 86400;
}
// Round to nearest day
$cutoff -= ( $cutoff % 86400 );
} elseif ( $scale === 'years' ) {
// Go Xwards/ day by day until we are on the first day of the year
while ( date( 'z', $cutoff ) !== '0' ) {
$cutoff += $dir * 86400;
}
// Round to nearest day
$cutoff -= ( $cutoff % 86400 );
}
return $cutoff;
}
private static function roundingAddition( int $ts, int $amount, int $dir ): int {
if ( $dir === -1 ) {
return -1 * ( $ts % $amount );
} else {
return $amount - ( $ts % $amount );
}
}
/**
* Returns an increment in seconds for a given scale.
* The increment must be small enough that we will hit every item in the
* scale when using different multiples of the increment. It should be
* large enough to avoid hitting the same item multiple times.
*/
private static function getIncrement( string $scale ): int {
$increment = 3600 * 24;
if ( $scale === 'years' ) {
$increment = 3600 * 24 * 350;
} elseif ( $scale === 'months' ) {
/* We use increment to fill up the values. Use number small enough
* to ensure we hit each month */
$increment = 3600 * 24 * 15;
} elseif ( $scale === 'weeks' ) {
$increment = 3600 * 24 * 7;
} elseif ( $scale === 'hours' ) {
$increment = 3600;
}
return $increment;
}
}
|