Less_Parser: Inline and optimize heavily called MatchQuoted()

The method was not used anywhere else and can as well be inlined. This does have an actual effect in real-world scenarios. On my local machine, MediaWiki's ResourcesTest (parses hundreds of Less files in PHPUnit) runs approx. 8% faster just because of this change. Some details: * Function call overhead is surprisingly significant in PHP. This method was called 200,000 times. This adds up. * The main change is the initial check for the first character. This succeeds in about 50% of the cases where everything below can be skipped. * Inlining the code makes some more minor adjustments and optimizations possible. * The $isEscaped code path is super rare and should be skipped as fast as possible if it doesn't apply. Bug: T225730 Change-Id: Ie7c31f375ea99fb38e61b60eb99dbbd505315ec8
wikimedia · Jul 23, 2023 · e6e963a · e6e963a
1 parent 69d49fd
commit e6e963a
Showing 1 changed file with 37 additions and 41 deletions.
diff --git a/lib/Less/Parser.php b/lib/Less/Parser.php
@@ -815,39 +815,6 @@ private function MatchReg( $tok ) {
 		}
 	}
 
-	/**
-	 * @return null|string
-	 * @see less-2.5.3.js#parserInput.$quoted
-	 */
-	private function MatchQuoted() {
-		$startChar = $this->input[$this->pos] ?? null;
-		if ( $startChar !== "'" && $startChar !== '"' ) {
-			return;
-		}
-
-		$i = 1;
-		while ( $this->pos + $i < $this->input_len ) {
-			// Optimization: Skip over irrelevant chars without slow loop
-			$i += strcspn( $this->input, "\n\r$startChar\\", $this->pos + $i );
-			$nextChar = $this->input[$this->pos + $i];
-			$i++;
-			switch ( $nextChar ) {
-			case "\\":
-				$i++;
-				break;
-			case "\r":
-			case "\n":
-				return;
-			case $startChar:
-				$matched = substr( $this->input, $this->pos, $i );
-				$this->skipWhitespace( $i );
-				return $matched;
-			}
-		}
-
-		return null;
-	}
-
 	/**
 	 * Same as match(), but don't change the state of the parser,
 	 * just return the match.
@@ -1040,20 +1007,49 @@ private function parseComments() {
 	 * @see less-2.5.3.js#entities.quoted
 	 */
 	private function parseEntitiesQuoted() {
-		$index = $this->pos;
+		// Optimization: Determine match potential without save()/restore() overhead
+		// Optimization: Inline MatchChar() here, with its skipWhitespace(1) call below
+		$startChar = $this->input[$this->pos] ?? null;
+		$isEscaped = $startChar === '~';
+		if ( !$isEscaped && $startChar !== "'" && $startChar !== '"' ) {
+			return;
+		}
 
+		$index = $this->pos;
 		$this->save();
 
-		$isEscaped = $this->MatchChar( '~' ) !== null;
-		$str = $this->MatchQuoted();
-		if ( $str === null ) {
-			$this->restore();
-			return;
+		if ( $isEscaped ) {
+			$this->skipWhitespace( 1 );
+			$startChar = $this->input[$this->pos] ?? null;
+			if ( $startChar !== "'" && $startChar !== '"' ) {
+				$this->restore();
+				return;
+			}
 		}
 
-		$this->forget();
+		// Optimization: Inline matching of quotes for 8% overall speed up
+		// on large LESS files. https://gerrit.wikimedia.org/r/939727
+		// @see less-2.5.3.js#parserInput.$quoted
+		$i = 1;
+		while ( $this->pos + $i < $this->input_len ) {
+			// Optimization: Skip over irrelevant chars without slow loop
+			$i += strcspn( $this->input, "\n\r$startChar\\", $this->pos + $i );
+			switch ( $this->input[$this->pos + $i++] ) {
+				case "\\":
+					$i++;
+					break;
+				case "\r":
+				case "\n":
+					break 2;
+				case $startChar:
+					$str = substr( $this->input, $this->pos, $i );
+					$this->skipWhitespace( $i );
+					$this->forget();
+					return new Less_Tree_Quoted( $str[0], substr( $str, 1, -1 ), $isEscaped, $index, $this->env->currentFileInfo );
+			}
+		}
 
-		return new Less_Tree_Quoted( $str[0], substr( $str, 1, -1 ), $isEscaped, $index, $this->env->currentFileInfo );
+		$this->restore();
 	}
 
 	/**