diff --git a/docs/changes/1.x/1.3.0.md b/docs/changes/1.x/1.3.0.md index 6157befb03..a843c80bb1 100644 --- a/docs/changes/1.x/1.3.0.md +++ b/docs/changes/1.x/1.3.0.md @@ -11,6 +11,7 @@ - Word2007 Reader : Support for FormFields by [@vincentKool](https://github.com/vincentKool) in [#2653](https://github.com/PHPOffice/PHPWord/pull/2653) - RTF Writer : Support for Table Border Style fixing [#345](https://github.com/PHPOffice/PHPWord/issues/345) by [@Progi1984](https://github.com/Progi1984) in [#2656](https://github.com/PHPOffice/PHPWord/pull/2656) - Word2007 Reader: Support the page break () by [@stanolacko](https://github.com/stanolacko) in [#2662](https://github.com/PHPOffice/PHPWord/pull/2662) +- MsDoc Reader: Support for UTF-8 characters by [@Progi1984] fixing [#881](https://github.com/PHPOffice/PHPWord/issues/881), [#1454](https://github.com/PHPOffice/PHPWord/issues/1454), [#1817](https://github.com/PHPOffice/PHPWord/issues/1817), [#1927](https://github.com/PHPOffice/PHPWord/issues/1927), [#2383](https://github.com/PHPOffice/PHPWord/issues/2383), [#2565](https://github.com/PHPOffice/PHPWord/issues/2565) in [#2664](https://github.com/PHPOffice/PHPWord/pull/2664) ### Bug fixes diff --git a/src/PhpWord/Reader/MsDoc.php b/src/PhpWord/Reader/MsDoc.php index 2599e42350..72b9af6c59 100644 --- a/src/PhpWord/Reader/MsDoc.php +++ b/src/PhpWord/Reader/MsDoc.php @@ -1279,10 +1279,12 @@ private function readRecordPlcfBtePapx(): void break; } $strLen = $arrayRGFC[$key + 1] - $arrayRGFC[$key] - 1; - for ($inc = 0; $inc < $strLen; ++$inc) { - $byte = self::getInt1d($this->dataWorkDocument, $arrayRGFC[$key] + $inc); + for ($inc = 0; $inc < ($strLen * 2); ++$inc) { + $byte = self::getInt2d($this->dataWorkDocument, $arrayRGFC[$key] + ($inc * 2)); if ($byte > 0) { - $string .= chr($byte); + $string .= mb_chr($byte, 'UTF-8'); + } else { + break; } } } @@ -2331,7 +2333,7 @@ private function generatePhpWord(): void foreach ($this->arrayParagraphs as $itmParagraph) { $textPara = $itmParagraph; foreach ($this->arrayCharacters as $oCharacters) { - $subText = substr($textPara, $oCharacters->pos_start, $oCharacters->pos_len); + $subText = mb_substr($textPara, $oCharacters->pos_start, $oCharacters->pos_len); $subText = str_replace(chr(13), PHP_EOL, $subText); $arrayText = explode(PHP_EOL, $subText); if (end($arrayText) == '') { diff --git a/tests/PhpWordTests/Reader/MsDocTest.php b/tests/PhpWordTests/Reader/MsDocTest.php index b62d545de0..b243fccfee 100644 --- a/tests/PhpWordTests/Reader/MsDocTest.php +++ b/tests/PhpWordTests/Reader/MsDocTest.php @@ -18,7 +18,9 @@ namespace PhpOffice\PhpWordTests\Reader; use Exception; +use PhpOffice\PhpWord\Element\Text; use PhpOffice\PhpWord\IOFactory; +use PhpOffice\PhpWord\PhpWord; use PhpOffice\PhpWord\Reader\MsDoc; /** @@ -50,14 +52,20 @@ public function testCanReadFailed(): void self::assertFalse($object->canRead($filename)); } - /** - * Load. - */ - public function testLoad(): void + public function testLoadBasic(): void { $filename = __DIR__ . '/../_files/documents/reader.doc'; $phpWord = IOFactory::load($filename, 'MsDoc'); - self::assertInstanceOf('PhpOffice\\PhpWord\\PhpWord', $phpWord); + self::assertInstanceOf(PhpWord::class, $phpWord); + + $sections = $phpWord->getSections(); + self::assertCount(1, $sections); + $elements = $sections[0]->getElements(); + self::assertArrayHasKey(0, $elements); + /** @var Text $element0 */ + $element0 = $elements[0]; + self::assertInstanceOf(Text::class, $element0); + self::assertEquals('Welcome to PhpWord', $element0->getText()); } public function testLoadHalfPointFont(): void @@ -76,6 +84,54 @@ public function testLoadHalfPointFont(): void } } + public function testLoadChinese(): void + { + $filename = __DIR__ . '/../_files/documents/docChinese.doc'; + $phpWord = IOFactory::load($filename, 'MsDoc'); + self::assertInstanceOf(PhpWord::class, $phpWord); + + $sections = $phpWord->getSections(); + self::assertCount(1, $sections); + $elements = $sections[0]->getElements(); + self::assertArrayHasKey(0, $elements); + /** @var Text $element0 */ + $element0 = $elements[0]; + self::assertInstanceOf(Text::class, $element0); + self::assertEquals('OKKI AI 客户案例', $element0->getText()); + } + + public function testLoadCzech(): void + { + $filename = __DIR__ . '/../_files/documents/docCzech.doc'; + $phpWord = IOFactory::load($filename, 'MsDoc'); + self::assertInstanceOf(PhpWord::class, $phpWord); + + $sections = $phpWord->getSections(); + self::assertCount(1, $sections); + $elements = $sections[0]->getElements(); + self::assertArrayHasKey(0, $elements); + /** @var Text $element0 */ + $element0 = $elements[0]; + self::assertInstanceOf(Text::class, $element0); + self::assertEquals('Příliš žluťoučký kůň pěl ďábelské ódy', $element0->getText()); + } + + public function testLoadSlovak(): void + { + $filename = __DIR__ . '/../_files/documents/docSlovak.doc'; + $phpWord = IOFactory::load($filename, 'MsDoc'); + self::assertInstanceOf(PhpWord::class, $phpWord); + + $sections = $phpWord->getSections(); + self::assertCount(1, $sections); + $elements = $sections[0]->getElements(); + self::assertArrayHasKey(0, $elements); + /** @var Text $element0 */ + $element0 = $elements[0]; + self::assertInstanceOf(Text::class, $element0); + self::assertEquals('Pondelok', $element0->getText()); + } + /** * Test exception on not existing file. */ diff --git a/tests/PhpWordTests/_files/documents/docChinese.doc b/tests/PhpWordTests/_files/documents/docChinese.doc new file mode 100644 index 0000000000..db245953e5 Binary files /dev/null and b/tests/PhpWordTests/_files/documents/docChinese.doc differ diff --git a/tests/PhpWordTests/_files/documents/docCzech.doc b/tests/PhpWordTests/_files/documents/docCzech.doc new file mode 100644 index 0000000000..8def81b13e Binary files /dev/null and b/tests/PhpWordTests/_files/documents/docCzech.doc differ diff --git a/tests/PhpWordTests/_files/documents/docSlovak.doc b/tests/PhpWordTests/_files/documents/docSlovak.doc new file mode 100644 index 0000000000..dede581e55 Binary files /dev/null and b/tests/PhpWordTests/_files/documents/docSlovak.doc differ