summaryrefslogtreecommitdiff
blob: f14dba66bb2abb16eb0efa720e8ed8cbf9ab89c9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
<?php
/**
 * @file
 * @license GPL-2.0-or-later
 */

namespace MediaWiki\Extension\Translate\Utilities;

use GettextPluralException;
use InvalidArgumentException;
use TranslateUtils;

/** @since 2019.09 */
class GettextPlural {
	private const PRE = '{{PLURAL:GETTEXT|';
	private const POST = '}}';

	/**
	 * Returns Gettext plural rule for given language.
	 *
	 * @param string $code Language tag in MediaWiki internal format.
	 * @return string Empty string if no plural rule found
	 */
	public static function getPluralRule( $code ) {
		global $wgTranslateDocumentationLanguageCode;

		if ( $code === $wgTranslateDocumentationLanguageCode ) {
			return 'nplurals=1; plural=0;';
		}

		$rulefile = __DIR__ . '/../../data/plural-gettext.txt';
		$rules = file_get_contents( $rulefile );
		foreach ( explode( "\n", $rules ) as $line ) {
			if ( trim( $line ) === '' ) {
				continue;
			}
			[ $rulecode, $rule ] = explode( "\t", $line );
			if ( $rulecode === $code ) {
				return $rule;
			}
		}

		return '';
	}

	/**
	 * Returns how many plural forms are expected by a given plural rule.
	 *
	 * @param string $rule Gettext style plural rule.
	 * @return int
	 * @throws InvalidArgumentException
	 */
	public static function getPluralCount( $rule ) {
		$m = [];
		$ok = preg_match( '/nplurals=([0-9]+).*;/', $rule, $m );
		if ( !$ok ) {
			throw new InvalidArgumentException( "Rule $rule is malformed" );
		}
		return (int)$m[ 1 ];
	}

	/**
	 * Quick way to check if the text contains plural syntax.
	 *
	 * @param string $text
	 * @return bool
	 */
	public static function hasPlural( $text ) {
		return strpos( $text, self::PRE ) !== false;
	}

	/**
	 * Format plural forms as single string suitable for translation.
	 *
	 * @param string[] $forms
	 * @return string
	 */
	public static function flatten( array $forms ) {
		return self::PRE . implode( '|', $forms ) . self::POST;
	}

	/**
	 * Format translation with plural forms as array of forms.
	 *
	 * Reverse of flatten. Do note that A may be != flatten( unflatten( A ) ) because
	 * translators can place part of the text outside the plural markup or use multiple
	 * instances of the markup.
	 *
	 * @param string $text
	 * @param int $expectedPluralCount
	 * @return string[]
	 */
	public static function unflatten( $text, $expectedPluralCount ) {
		[ $template, $instanceMap ] = self::parsePluralForms( $text );
		return self::expandTemplate( $template, $instanceMap, $expectedPluralCount );
	}

	/**
	 * Replaces problematic markup which can confuse our plural syntax markup with placeholders
	 *
	 * @param string $text
	 * @return array [ string $text, array $map ]
	 */
	private static function armour( $text ) {
		// |/| is commonly used in KDE to support inflections. It needs to be escaped
		// to avoid it messing up the plural markup.
		$replacements = [
			'|/|' => TranslateUtils::getPlaceholder(),
		];
		// {0} is a common variable format
		preg_match_all( '/\{\d+\}/', $text, $matches );
		foreach ( $matches[0] as $m ) {
			$replacements[$m] = TranslateUtils::getPlaceholder();
		}

		$text = strtr( $text, $replacements );
		$map = array_flip( $replacements );

		return [ $text, $map ];
	}

	/**
	 * Reverse of armour.
	 *
	 * @param string $text
	 * @param array $map Map returned by armour.
	 * @return string
	 */
	private static function unarmour( $text, array $map ) {
		return strtr( $text, $map );
	}

	/**
	 * Parses plural markup into a structure form.
	 *
	 * @param string $text
	 * @return array [ string $template, array $instanceMap ]
	 */
	public static function parsePluralForms( $text ) {
		$m = [];
		$pre = preg_quote( self::PRE, '/' );
		$post = preg_quote( self::POST, '/' );

		[ $armouredText, $armourMap ] = self::armour( $text );

		$ok = preg_match_all( "/$pre(.*)$post/Us", $armouredText, $m );
		if ( $ok === false ) {
			throw new GettextPluralException( "Plural regular expression failed for text: $text" );
		}

		$template = $armouredText;
		$instanceMap = [];

		foreach ( $m[0] as $instanceIndex => $instanceText ) {
			$ph = TranslateUtils::getPlaceholder();

			// Using preg_replace instead of str_replace because of the limit parameter
			$pattern = '/' . preg_quote( $instanceText, '/' ) . '/';
			$template = preg_replace( $pattern, $ph, $template, 1 );

			$instanceForms = explode( '|', $m[ 1 ][ $instanceIndex ] );
			foreach ( $instanceForms as $i => $v ) {
				$instanceForms[ $i ] = self::unarmour( $v, $armourMap );
			}

			$instanceMap[$ph] = $instanceForms;
		}

		$template = self::unarmour( $template, $armourMap );
		return [ $template, $instanceMap ];
	}

	/**
	 * Gives fully expanded forms given a template and parsed plural markup instances.
	 *
	 * @param string $template
	 * @param array $instanceMap
	 * @param int $expectedPluralCount
	 * @return string[]
	 */
	public static function expandTemplate( $template, array $instanceMap, $expectedPluralCount ) {
		$formArray = [];
		for ( $formIndex = 0; $formIndex < $expectedPluralCount; $formIndex++ ) {
			// Start with the whole string
			$form = $template;

			// Loop over each plural markup instance and replace it with the plural form belonging
			// to the current index
			foreach ( $instanceMap as $ph => $instanceForms ) {
				// For missing forms, fall back to empty text.
				// Extra forms are excluded because $formIndex < $expectedPluralCount
				$replacement = $instanceForms[ $formIndex ] ?? '';
				$form = str_replace( $ph, $replacement, $form );
			}

			$formArray[ $formIndex ] = $form;
		}

		return $formArray;
	}
}