diff --git a/src/XliffUtils/DataRefReplacer.php b/src/XliffUtils/DataRefReplacer.php index 47cd79c..ea16bcc 100644 --- a/src/XliffUtils/DataRefReplacer.php +++ b/src/XliffUtils/DataRefReplacer.php @@ -50,19 +50,16 @@ public function replace( $string ) { } // 2. Replace tags - $toBeEscaped = Strings::isAnEscapedHTML( $string ); - - if ( $this->stringContainsPcTags( $string, $toBeEscaped ) ) { + if ( $this->stringContainsPcTags( $string ) ) { // replace self-closed - $string = $this->replaceSelfClosedPcTags( $string, $toBeEscaped ); + $string = $this->replaceSelfClosedPcTags( $string ); // create a dataRefEnd map // (needed for correct handling of closing tags) $dataRefEndMap = $this->buildDataRefEndMap( $html ); - $string = $this->replaceOpeningPcTags( $string, $toBeEscaped ); - $string = $this->replaceClosingPcTags( $string, $toBeEscaped, $dataRefEndMap ); - $string = ( $toBeEscaped ) ? Strings::escapeOnlyHTMLTags( $string ) : $string; + $string = $this->replaceOpeningPcTags( $string ); + $string = $this->replaceClosingPcTags( $string, $dataRefEndMap ); } return $string; @@ -146,7 +143,7 @@ private function recursiveAddEquivTextToPhTag( $node, $string ) { $value = $this->map[ $b ]; $base64EncodedValue = base64_encode( $value ); - if ( empty( $base64EncodedValue ) || $base64EncodedValue === '' ) { + if ( empty( $base64EncodedValue ) ) { return $string; } @@ -176,27 +173,21 @@ private function recursiveAddEquivTextToPhTag( $node, $string ) { /** * @param $string - * @param $toBeEscaped * * @return bool */ - private function stringContainsPcTags( $string, $toBeEscaped ) { - $regex = ( $toBeEscaped ) ? '/<pc (.*?)>/iu' : '//iu'; - preg_match_all( $regex, $string, $openingPcMatches ); + private function stringContainsPcTags( $string ) { + preg_match_all( '//iu', $string, $openingPcMatches ); return ( isset( $openingPcMatches[ 0 ] ) && count( $openingPcMatches[ 0 ] ) > 0 ); } /** * @param $string - * @param $toBeEscaped * * @return mixed */ - private function replaceSelfClosedPcTags( $string, $toBeEscaped ) { - if ( $toBeEscaped ) { - $string = str_replace( [ '<', '>' ], [ '<', '>' ], $string ); - } + private function replaceSelfClosedPcTags( $string ) { $regex = '/]+?\/>/iu'; preg_match_all( $regex, $string, $selfClosedPcMatches ); @@ -213,10 +204,6 @@ private function replaceSelfClosedPcTags( $string, $toBeEscaped ) { } } - if ( $toBeEscaped ) { - $string = str_replace( [ '<', '>' ], [ '<', '>' ], $string ); - } - return $string; } @@ -295,13 +282,11 @@ private function recursiveCleanFromEquivText( $node, $string ) { * Replace opening tags with correct reference in the $string * * @param string $string - * @param bool $toBeEscaped * * @return string */ - private function replaceOpeningPcTags( $string, $toBeEscaped ) { - $regex = ( $toBeEscaped ) ? '/<pc (.*?)>/iu' : '//iu'; - preg_match_all( $regex, $string, $openingPcMatches ); + private function replaceOpeningPcTags( $string ) { + preg_match_all( '//iu', $string, $openingPcMatches ); foreach ( $openingPcMatches[ 0 ] as $index => $match ) { $attr = HtmlParser::getAttributes( $openingPcMatches[ 1 ][ $index ] ); @@ -318,7 +303,7 @@ private function replaceOpeningPcTags( $string, $toBeEscaped ) { if ( isset( $attr[ 'dataRefStart' ] ) ) { $startOriginalData = $match; // opening - $startValue = $this->map[ $attr[ 'dataRefStart' ] ] ? $this->map[ $attr[ 'dataRefStart' ] ] : 'NULL'; //handling null values in original data map + $startValue = $this->map[ $attr[ 'dataRefStart' ] ] ?: 'NULL'; //handling null values in original data map $base64EncodedStartValue = base64_encode( $startValue ); $base64StartOriginalData = base64_encode( $startOriginalData ); @@ -339,14 +324,12 @@ private function replaceOpeningPcTags( $string, $toBeEscaped ) { * thanks to $dataRefEndMap * * @param string $string - * @param bool $toBeEscaped * @param array $dataRefEndMap * * @return string */ - private function replaceClosingPcTags( $string, $toBeEscaped, $dataRefEndMap = [] ) { - $regex = ( $toBeEscaped ) ? '/<\/pc>/iu' : '/<\/pc>/iu'; - preg_match_all( $regex, $string, $closingPcMatches, PREG_OFFSET_CAPTURE ); + private function replaceClosingPcTags( $string, $dataRefEndMap = [] ) { + preg_match_all( '||iu', $string, $closingPcMatches, PREG_OFFSET_CAPTURE ); $delta = 0; foreach ( $closingPcMatches[ 0 ] as $index => $match ) { diff --git a/tests/DataReplacerTest.php b/tests/DataReplacerTest.php index a99f016..ec199e2 100644 --- a/tests/DataReplacerTest.php +++ b/tests/DataReplacerTest.php @@ -20,8 +20,8 @@ public function can_replace_pc_with_adjacent_angle_brackets() $dataReplacer = new DataRefReplacer($map); - $string = '<pc id="source1" dataRefStart="source1">Age (if exact date is not available</pc><pc id="source2" dataRefStart="source2"> <day,month,year>  </pc><pc id="source3" dataRefStart="source3">or we have work/education history to prove the age difference)</pc>'; - $expected = '<ph id="source1_1" dataType="pcStart" originalData="Jmx0O3BjIGlkPSJzb3VyY2UxIiBkYXRhUmVmU3RhcnQ9InNvdXJjZTEiJmd0Ow==" dataRef="source1" equiv-text="base64:YQ=="/>Age (if exact date is not available<ph id="source1_2" dataType="pcEnd" originalData="Jmx0Oy9wYyZndDs=" dataRef="source1" equiv-text="base64:YQ=="/><ph id="source2_1" dataType="pcStart" originalData="Jmx0O3BjIGlkPSJzb3VyY2UyIiBkYXRhUmVmU3RhcnQ9InNvdXJjZTIiJmd0Ow==" dataRef="source2" equiv-text="base64:Yg=="/> <day,month,year>  <ph id="source2_2" dataType="pcEnd" originalData="Jmx0Oy9wYyZndDs=" dataRef="source2" equiv-text="base64:Yg=="/><ph id="source3_1" dataType="pcStart" originalData="Jmx0O3BjIGlkPSJzb3VyY2UzIiBkYXRhUmVmU3RhcnQ9InNvdXJjZTMiJmd0Ow==" dataRef="source3" equiv-text="base64:Yw=="/>or we have work/education history to prove the age difference)<ph id="source3_2" dataType="pcEnd" originalData="Jmx0Oy9wYyZndDs=" dataRef="source3" equiv-text="base64:Yw=="/>'; + $string = 'Age (if exact date is not available   or we have work/education history to prove the age difference)'; + $expected = 'Age (if exact date is not available   or we have work/education history to prove the age difference)'; $this->assertEquals($expected, $dataReplacer->replace($string)); $this->assertEquals($string, $dataReplacer->restore($expected)); @@ -476,8 +476,8 @@ public function can_replace_and_restore_data_with_pc_test_5() 'd4' => '](http://google.it)', ]; - $string = 'Link semplici: <pc id="1" dataRefEnd="d2" dataRefStart="d1">La Repubblica</pc>'; - $expected = 'Link semplici: <ph id="1_1" dataType="pcStart" originalData="Jmx0O3BjIGlkPSIxIiBkYXRhUmVmRW5kPSJkMiIgZGF0YVJlZlN0YXJ0PSJkMSImZ3Q7" dataRef="d1" equiv-text="base64:Ww=="/>La Repubblica<ph id="1_2" dataType="pcEnd" originalData="Jmx0Oy9wYyZndDs=" dataRef="d2" equiv-text="base64:XShodHRwOi8vcmVwdWJibGljYS5pdCk="/>'; + $string = 'Link semplici: La Repubblica'; + $expected = 'Link semplici: La Repubblica'; $dataReplacer = new DataRefReplacer($map); @@ -544,8 +544,8 @@ public function can_replace_and_restore_data_with_escaped_nested_pc_tags() 'd3' => '`', ]; - $string = 'Testo libero contenente <pc id="1" canCopy="no" canDelete="no" dataRefEnd="d1" dataRefStart="d1">corsivo</pc>, <pc id="2" canCopy="no" canDelete="no" dataRefEnd="d2" dataRefStart="d2">grassetto</pc>, <pc id="3" canCopy="no" canDelete="no" dataRefEnd="d1" dataRefStart="d1"><pc id="4" canCopy="no" canDelete="no" dataRefEnd="d2" dataRefStart="d2">grassetto + corsivo</pc></pc> e <pc id="5" canCopy="no" canDelete="no" dataRefEnd="d3" dataRefStart="d3">larghezza fissa</pc>.'; - $expected = 'Testo libero contenente <ph id="1_1" dataType="pcStart" originalData="Jmx0O3BjIGlkPSIxIiBjYW5Db3B5PSJubyIgY2FuRGVsZXRlPSJubyIgZGF0YVJlZkVuZD0iZDEiIGRhdGFSZWZTdGFydD0iZDEiJmd0Ow==" dataRef="d1" equiv-text="base64:Xw=="/>corsivo<ph id="1_2" dataType="pcEnd" originalData="Jmx0Oy9wYyZndDs=" dataRef="d1" equiv-text="base64:Xw=="/>, <ph id="2_1" dataType="pcStart" originalData="Jmx0O3BjIGlkPSIyIiBjYW5Db3B5PSJubyIgY2FuRGVsZXRlPSJubyIgZGF0YVJlZkVuZD0iZDIiIGRhdGFSZWZTdGFydD0iZDIiJmd0Ow==" dataRef="d2" equiv-text="base64:Kio="/>grassetto<ph id="2_2" dataType="pcEnd" originalData="Jmx0Oy9wYyZndDs=" dataRef="d2" equiv-text="base64:Kio="/>, <ph id="3_1" dataType="pcStart" originalData="Jmx0O3BjIGlkPSIzIiBjYW5Db3B5PSJubyIgY2FuRGVsZXRlPSJubyIgZGF0YVJlZkVuZD0iZDEiIGRhdGFSZWZTdGFydD0iZDEiJmd0Ow==" dataRef="d1" equiv-text="base64:Xw=="/><ph id="4_1" dataType="pcStart" originalData="Jmx0O3BjIGlkPSI0IiBjYW5Db3B5PSJubyIgY2FuRGVsZXRlPSJubyIgZGF0YVJlZkVuZD0iZDIiIGRhdGFSZWZTdGFydD0iZDIiJmd0Ow==" dataRef="d2" equiv-text="base64:Kio="/>grassetto + corsivo<ph id="4_2" dataType="pcEnd" originalData="Jmx0Oy9wYyZndDs=" dataRef="d2" equiv-text="base64:Kio="/><ph id="3_2" dataType="pcEnd" originalData="Jmx0Oy9wYyZndDs=" dataRef="d1" equiv-text="base64:Xw=="/> e <ph id="5_1" dataType="pcStart" originalData="Jmx0O3BjIGlkPSI1IiBjYW5Db3B5PSJubyIgY2FuRGVsZXRlPSJubyIgZGF0YVJlZkVuZD0iZDMiIGRhdGFSZWZTdGFydD0iZDMiJmd0Ow==" dataRef="d3" equiv-text="base64:YA=="/>larghezza fissa<ph id="5_2" dataType="pcEnd" originalData="Jmx0Oy9wYyZndDs=" dataRef="d3" equiv-text="base64:YA=="/>.'; + $string = 'Testo libero contenente corsivo, grassetto, grassetto + corsivo e larghezza fissa.'; + $expected = 'Testo libero contenente corsivo, grassetto, grassetto + corsivo e larghezza fissa.'; $dataReplacer = new DataRefReplacer($map); @@ -579,8 +579,8 @@ public function can_replace_and_restore_data_with_pc_and_ph_real_matecat_tags() 'source1' => '<w:hyperlink r:id="rId6"></w:hyperlink>', ]; - $string = 'This code of conduct sets forth the minimum standards by which Uber’s Driver Partners must adhere when using the Uber app in Czech Republic, in addition to the terms of their services agreement with Uber and the <pc id="source1" dataRefStart="source1" dataRefEnd="source1"><ph id="mtc_2" equiv-text="base64:Jmx0O3BjIGlkPSIxdSIgdHlwZT0iZm10IiBzdWJUeXBlPSJtOnUiJmd0Ow=="/>Uber Community Guidelines<ph id="mtc_3" equiv-text="base64:Jmx0Oy9wYyZndDs="/></pc>.'; - $expected = 'This code of conduct sets forth the minimum standards by which Uber’s Driver Partners must adhere when using the Uber app in Czech Republic, in addition to the terms of their services agreement with Uber and the <ph id="source1_1" dataType="pcStart" originalData="Jmx0O3BjIGlkPSJzb3VyY2UxIiBkYXRhUmVmU3RhcnQ9InNvdXJjZTEiIGRhdGFSZWZFbmQ9InNvdXJjZTEiJmd0Ow==" dataRef="source1" equiv-text="base64:Jmx0O3c6aHlwZXJsaW5rIHI6aWQ9InJJZDYiJmd0OyZsdDsvdzpoeXBlcmxpbmsmZ3Q7"/><ph id="mtc_2" equiv-text="base64:Jmx0O3BjIGlkPSIxdSIgdHlwZT0iZm10IiBzdWJUeXBlPSJtOnUiJmd0Ow=="/>Uber Community Guidelines<ph id="mtc_3" equiv-text="base64:Jmx0Oy9wYyZndDs="/><ph id="source1_2" dataType="pcEnd" originalData="Jmx0Oy9wYyZndDs=" dataRef="source1" equiv-text="base64:Jmx0O3c6aHlwZXJsaW5rIHI6aWQ9InJJZDYiJmd0OyZsdDsvdzpoeXBlcmxpbmsmZ3Q7"/>.'; + $string = 'This code of conduct sets forth the minimum standards by which Uber’s Driver Partners must adhere when using the Uber app in Czech Republic, in addition to the terms of their services agreement with Uber and the Uber Community Guidelines.'; + $expected = 'This code of conduct sets forth the minimum standards by which Uber’s Driver Partners must adhere when using the Uber app in Czech Republic, in addition to the terms of their services agreement with Uber and the Uber Community Guidelines.'; $dataReplacer = new DataRefReplacer($map); $this->assertEquals($expected, $dataReplacer->replace($string)); @@ -656,8 +656,8 @@ public function can_replace_and_restore_data_with_pc_with_non_standard_character ]; // this string contains ’ - $string = '<pc id="source4" dataRefStart="source4">The rider can’t tell if the driver matched the profile picture.</pc>'; - $expected = '<ph id="source4_1" dataType="pcStart" originalData="Jmx0O3BjIGlkPSJzb3VyY2U0IiBkYXRhUmVmU3RhcnQ9InNvdXJjZTQiJmd0Ow==" dataRef="source4" equiv-text="base64:PGcgaWQ9IjVTdENZWVJ2cU1jMFVBejQiIGN0eXBlPSJ4LWh0bWwtdWwiIFwvPg=="/>The rider can’t tell if the driver matched the profile picture.<ph id="source4_2" dataType="pcEnd" originalData="Jmx0Oy9wYyZndDs=" dataRef="source4" equiv-text="base64:PGcgaWQ9IjVTdENZWVJ2cU1jMFVBejQiIGN0eXBlPSJ4LWh0bWwtdWwiIFwvPg=="/>'; + $string = 'The rider can’t tell if the driver matched the profile picture.'; + $expected = 'The rider can’t tell if the driver matched the profile picture.'; $dataReplacer = new DataRefReplacer($map); $this->assertEquals($expected, $dataReplacer->replace($string)); @@ -920,8 +920,8 @@ public function more_tests_with_self_closed_pc_tags() $dataReplacer = new DataRefReplacer($map); - $string = '<pc dataRefStart="source5" id="source5"/><ph dataRef="source6" id="source6"/><ph dataRef="source7" id="source7"/><pc dataRefStart="source8" id="source8">Let’s start!</pc><ph dataRef="source9" id="source9"/><ph dataRef="source10" id="source10"/>'; - $expected = '<ph id="source5" dataType="pcSelf" originalData="PHBjIGRhdGFSZWZTdGFydD0ic291cmNlNSIgaWQ9InNvdXJjZTUiLz4=" dataRef="source5" equiv-text="base64:eC10ZXh0"/><ph dataRef="source6" id="source6" equiv-text="base64:Jmx0O2VwdCBpZD0ic3Bhbl8yIiBcLyZndDs="/><ph dataRef="source7" id="source7" equiv-text="base64:eC1zdHlsZQ=="/><ph id="source8_1" dataType="pcStart" originalData="Jmx0O3BjIGRhdGFSZWZTdGFydD0ic291cmNlOCIgaWQ9InNvdXJjZTgiJmd0Ow==" dataRef="source8" equiv-text="base64:eC10ZXh0"/>Let’s start!<ph id="source8_2" dataType="pcEnd" originalData="Jmx0Oy9wYyZndDs=" dataRef="source8" equiv-text="base64:eC10ZXh0"/><ph dataRef="source9" id="source9" equiv-text="base64:Jmx0O2VwdCBpZD0ic3Bhbl8zIiBcLyZndDs="/><ph dataRef="source10" id="source10" equiv-text="base64:Jmx0O2VwdCBpZD0iYmxvY2tfMCIgXC8mZ3Q7"/>'; + $string = 'Let’s start!'; + $expected = 'Let’s start!'; $this->assertEquals($expected, $dataReplacer->replace($string)); $this->assertEquals($string, $dataReplacer->restore($expected));