From: <se...@us...> - 2010-01-15 09:59:30
|
Revision: 24398 http://tikiwiki.svn.sourceforge.net/tikiwiki/?rev=24398&view=rev Author: sept_7 Date: 2010-01-15 09:59:20 +0000 (Fri, 15 Jan 2010) Log Message: ----------- [MRG] Automatic merge, trunk 24390 to 24397 Modified Paths: -------------- branches/experimental/tiki-log-merge/db/convertscripts/mysql_to_sqlite.php branches/experimental/tiki-log-merge/lib/core/lib/Multilingual/Aligner/MockMTWrapper.php branches/experimental/tiki-log-merge/lib/core/lib/Multilingual/Aligner/SentenceAlignments.php branches/experimental/tiki-log-merge/lib/core/lib/Multilingual/Aligner/UpdatePages.php branches/experimental/tiki-log-merge/lib/core/lib/Multilingual/Aligner/UpdateSentences.php branches/experimental/tiki-log-merge/lib/search/searchlib-mysql.php branches/experimental/tiki-log-merge/lib/test/auth/MembershipTest.php branches/experimental/tiki-log-merge/lib/wiki-plugins/wikiplugin_scroll.php branches/experimental/tiki-log-merge/templates/tiki.tpl branches/experimental/tiki-log-merge/tiki-admin_keywords.php branches/experimental/tiki-log-merge/tiki-setup_base.php Property Changed: ---------------- branches/experimental/tiki-log-merge/ Property changes on: branches/experimental/tiki-log-merge ___________________________________________________________________ Modified: svn:mergeinfo - /branches/3.0:16998-18916 /branches/4.x:22817-23908 /branches/experimental/declfilter:15787-16031 /branches/experimental/edit_fixup:20923-21573 /branches/experimental/kaltura_integration_two:20962-22011 /branches/experimental/moz-metrics:23452-23480 /branches/experimental/ui-revamp2:16728-16731,16999-17002 /branches/proposals/4.x:24005 /trunk:24185-24390 + /branches/3.0:16998-18916 /branches/4.x:22817-23908 /branches/experimental/declfilter:15787-16031 /branches/experimental/edit_fixup:20923-21573 /branches/experimental/kaltura_integration_two:20962-22011 /branches/experimental/moz-metrics:23452-23480 /branches/experimental/ui-revamp2:16728-16731,16999-17002 /branches/proposals/4.x:24005 /trunk:24185-24397 Modified: branches/experimental/tiki-log-merge/db/convertscripts/mysql_to_sqlite.php =================================================================== --- branches/experimental/tiki-log-merge/db/convertscripts/mysql_to_sqlite.php 2010-01-15 09:47:23 UTC (rev 24397) +++ branches/experimental/tiki-log-merge/db/convertscripts/mysql_to_sqlite.php 2010-01-15 09:59:20 UTC (rev 24398) @@ -153,5 +153,3 @@ // //// save file //file_put_contents($tikiversion.'.to_sqlite.sql', $data); - -?> \ No newline at end of file Modified: branches/experimental/tiki-log-merge/lib/core/lib/Multilingual/Aligner/MockMTWrapper.php =================================================================== --- branches/experimental/tiki-log-merge/lib/core/lib/Multilingual/Aligner/MockMTWrapper.php 2010-01-15 09:47:23 UTC (rev 24397) +++ branches/experimental/tiki-log-merge/lib/core/lib/Multilingual/Aligner/MockMTWrapper.php 2010-01-15 09:59:20 UTC (rev 24398) @@ -1,27 +1,27 @@ -<?php - -require_once 'SentenceAlignments.php'; -require_once 'lib/core/lib/Multilingual/MachineTranslation/GoogleTranslateWrapper.php'; - -class Multilingual_Aligner_MockMTWrapper extends Multilingual_Aligner_SentenceAlignments -{ - public function getTranslationInOtherLanguage($source_lng_sentence, $source_lng) { - - if($source_lng == 'en') - $k = 1; - elseif ($source_lng == 'fr') - $k = 0; - - foreach ($this->alignment_table as $key=>$val) { - if ($k==1) { - if ($key==$source_lng_sentence) - return $val; - } else { - if ($val==$source_lng_sentence) - return $key; - } - } - return 'NULL'; - } - -}//class ends +<?php + +require_once 'SentenceAlignments.php'; +require_once 'lib/core/lib/Multilingual/MachineTranslation/GoogleTranslateWrapper.php'; + +class Multilingual_Aligner_MockMTWrapper extends Multilingual_Aligner_SentenceAlignments +{ + public function getTranslationInOtherLanguage($source_lng_sentence, $source_lng) { + + if($source_lng == 'en') + $k = 1; + elseif ($source_lng == 'fr') + $k = 0; + + foreach ($this->alignment_table as $key=>$val) { + if ($k==1) { + if ($key==$source_lng_sentence) + return $val; + } else { + if ($val==$source_lng_sentence) + return $key; + } + } + return 'NULL'; + } + +}//class ends Modified: branches/experimental/tiki-log-merge/lib/core/lib/Multilingual/Aligner/SentenceAlignments.php =================================================================== --- branches/experimental/tiki-log-merge/lib/core/lib/Multilingual/Aligner/SentenceAlignments.php 2010-01-15 09:47:23 UTC (rev 24397) +++ branches/experimental/tiki-log-merge/lib/core/lib/Multilingual/Aligner/SentenceAlignments.php 2010-01-15 09:59:20 UTC (rev 24398) @@ -1,396 +1,396 @@ -<?php - -/* - * Class used to store aligned bilingual sentences for two different - * linguistic versions of a same document. For example, English - * sentences with their corresponding French sentences. - */ -include_once "SentenceSegmentor.php"; - -class Multilingual_Aligner_SentenceAlignments -{ - protected $alignment_table=array(); - protected $l1="en"; - protected $l2="fr"; - public function addSentencePair($lng1_sentence, $lng1, $lng2_sentence, $lng2) - { - if($lng1==$this->l1) - { - $this->alignment_table[$lng1_sentence]=$lng2_sentence; - } - else if($lng2==$this->l1) - { $this->alignment_table[$lng2_sentence]=$lng1_sentence; - } - } - - public function getSentenceInOtherLanguage($source_lng_sentence, $source_lng,$key_value,$sentence_array,$index) - { - $segmentor = new Multilingual_Aligner_SentenceSegmentor(); - - if($source_lng==$this->l1) - $k=1; - else if($source_lng==$this->l2) - $k=0; - foreach($this->alignment_table as $key=>$val) - { - if($k==1) - { $sentences = $segmentor->segment(trim($key)); - { - if(strcmp(trim($sentences[0]),trim($source_lng_sentence))==0)//if one of those is matched - { - $found=1; - for($j=1,$l=1;$j<count($sentences) ;$l++) - {$flag=0; - if(($l +$index)>=count($sentence_array)) - {$found=0; - break; - } - if(strcmp(trim($sentence_array[$index+$l]),trim($sentences[$j]))!=0) - { - if($sentence_array[$index+$l]=="" || $sentence_array[$index+$l][0]!="+") //if it is an added sentence - {$found=0; - break; - }//if - else - $flag=1; - }//if - if($flag==0) - $j++; - }//for - if($found==1) - { - $key_value=$key; - $array=array($key,$val); - return $array; - }//if - }//if - - }//foreach - - }//if $k - else - { - $sentences = $segmentor->segment(trim($val)); - { - if(strcmp(trim($sentences[0]),trim($source_lng_sentence))==0)//if one of those is matched - { - $found=1; - for($j=$i+1,$l=1;$j<count($sentences);$l++) - {$flag=0; - if(($l +$index)>=count($sentence_array)) - {$found=0; - break; - } - - if(strcmp(trim($sentence_array[$index+$l]),trim($sentences[$j]))!=0) - { - if($sentence_array[$index+$l]=="" || $sentence_array[$index+$l][0]!="+") //if it is an added sentence - {$found=0; - break; - }//if - else - $flag=1; - }//if - if($flag==0) - $j++; - }//for - if($found==1) - { - $key_value=$val; - $array=array($val,$key); - return $array; - }//if - }//if - - }//foreach - - - - } //else - - }//foreach - - if($k==1) - { $times=0; - $i=-1; - $temp1="NULL"; - $temp2="NULL"; - $index1=$index; - $start=0; - $value=""; - $found=0; - - foreach($this->alignment_table as $key=>$val) - { $start++; - $sent_ind=0; - $sentences = $segmentor->segment(trim($key)); - for($j=0;$j<count($sentences);$j++) - { - $sentences[$j]=trim($sentences[$j]); - } - - - while(1) - {$found=0; - - if($temp1=="NULL" && $sent_ind<count($sentences)) - {$temp1=$sentences[$sent_ind]; - $sent_ind++; - } - if($temp2=="NULL") - {$temp2=$source_lng_sentence; - $index1; - } - $temp1=trim($temp1); - $temp2=trim($temp2); - - if(($c=$this->strpos_function($temp1,$temp2))!=-1 && $c ==0) - { $found=1; - if(strlen($temp1)==strlen($temp2) && $sent_ind==count($sentences)) - {for($u=0;$u<$start;$u++)//return key and val - {prev($this->alignment_table); - } - $d=key($this->alignment_table); - $key_value=$key_value.$d; - $value=$value.current($this->alignment_table); - for($u=0;$u<$start-1;$u++)//return key and val - { next($this->alignment_table); - $d=key($this->alignment_table); - - $key_value=$key_value.$d; - $value=$value.current($this->alignment_table); - - } - $array=array($key_value,$value,$dummy); - $start=0; - return $array; - } - $temp1 = substr($temp1,strlen($temp2)); - if($temp1=="") - $temp1="NULL"; - while(($index1+1)<count($sentence_array)) - {if($sentence_array[$index1+1]=="" || $sentence_array[$index1+1][0]!="+") - { - $temp2=$sentence_array[$index1+1]; - $index1++; - break; - }//if - $index1++; - }//while - continue; - - }//if strpos_function($sentence[0],$source_lng_sentence) - else if(($c=$this->strpos_function($temp2,$temp1))!=-1 && $c ==0) - { - - $found=1; - if(strlen($temp1) == strlen($temp2) && $sent_ind==count($sentences)) - { - for($u=0;$u<$start;$u++)//return key and val - { - prev($this->alignment_table); - } - $d=key($this->alignment_table); - - $key_value=$key_value.$d; - $value=$value.current($this->alignment_table); - for($u=0;$u<$start-1;$u++)//return key and val - { - next($this->alignment_table); - $key_value=$key_value.key($this->alignment_table); - $value=$value.current($this->alignment_table); - - } - $array=array($key_value,$value,$dummy); - $start=0; - return $array; - }//if equal - - $temp2=substr($temp2,strlen($temp1)); - if($sent_ind>=count($sentences)) - { - $temp1="NULL"; - break; - } - else - { - $temp1=$sentences[$sent_ind]; - $sent_ind++; - } - }//if strpos_function($sentence[0],$source_lng_sentence) - if($found==0) - { - $start=0; - $value=""; - break; - }//if - }//while - if($found==0) - { - $temp1="NULL"; - $temp2="NULL"; - $index1=$index; - - } - }//foreach - - }//if $k - else - { $times=0; - $i=-1; - $temp1="NULL"; - $temp2="NULL"; - $index1=$index; - $start=0; - $value=""; - $found=0; - - foreach($this->alignment_table as $key=>$val) - { $start++; - $sent_ind=0; - $sentences = $segmentor->segment(trim($val)); - for($j=0;$j<count($sentences);$j++) - { - $sentences[$j]=trim($sentences[$j]); - } - - - while(1) - {$found=0; - - if($temp1=="NULL" && $sent_ind<count($sentences)) - {$temp1=$sentences[$sent_ind]; - $sent_ind++; - } - if($temp2=="NULL") - {$temp2=$source_lng_sentence; - $index1; - } - $temp1=trim($temp1); - $temp2=trim($temp2); - - if(($c=$this->strpos_function($temp1,$temp2))!=-1 && $c ==0) - { $found=1; - if(strlen($temp1)==strlen($temp2) && $sent_ind==count($sentences)) - {for($u=0;$u<$start;$u++)//return key and val - {prev($this->alignment_table); - } - $d=current($this->alignment_table); - $key_value=$key_value.$d; - $value=$value.key($this->alignment_table); - for($u=0;$u<$start-1;$u++)//return key and val - {next($this->alignment_table); - $d=current($this->alignment_table); - - $key_value=$key_value.$d; - $value=$value.key($this->alignment_table); - - } - $array=array($key_value,$value,$dummy); - $start=0; - return $array; - } - $temp1 = substr($temp1,strlen($temp2)); - if($temp1=="") - $temp1="NULL"; - while(($index1+1)<count($sentence_array)) - {if($sentence_array[$index1+1]=="" || $sentence_array[$index1+1][0]!="+") - { - $temp2=$sentence_array[$index1+1]; - $index1++; - break; - }//if - $index1++; - }//while - continue; - - }//if strpos_function($sentence[0],$source_lng_sentence) - else if(($c=$this->strpos_function($temp2,$temp1))!=-1 && $c ==0) - { - $found=1; - if(strlen($temp1) == strlen($temp2) && $sent_ind==count($sentences)) - { - for($u=0;$u<$start;$u++)//return key and val - { - prev($this->alignment_table); - } - $d=current($this->alignment_table); - - $key_value=$key_value.$d; - $value=$value.key($this->alignment_table); - for($u=0;$u<$start-1;$u++)//return key and val - { - next($this->alignment_table); - $key_value=$key_value.current($this->alignment_table); - $value=$value.current($this->alignment_table); - - } - $array=array($key_value,$value,$dummy); - $start=0; - return $array; - }//if equal - - $temp2=substr($temp2,strlen($temp1)); - if($sent_ind>=count($sentences)) - { - $temp1="NULL"; - break; - } - else - { - $temp1=$sentences[$sent_ind]; - $sent_ind++; - } - }//if strpos_function($sentence[0],$source_lng_sentence) - if($found==0) - {$start=0; - $value=""; - break; - }//if - }//while - if($found==0) - { - $temp1="NULL"; - $temp2="NULL"; - $index1=$index; - - } - }//foreach - - }//else - - $array=array("","NULL"); - return $array; - - } - - public function display_alignment_table() - { - echo "in func display<br/>"; - foreach($this->alignment_table as $key=>$val) { - echo "<-->".$key."<--->".$val."<--><br/>"; - } - } - - public function strpos_function($string,$pat) - { - if(strlen($string)==0 && strlen($pat)==0) - return 0; - else if(strlen($string)==0 ||strlen($pat)==0) - return -1; - $start=0; - $lasts=strlen($string)-1; - $lastp=strlen($pat)-1; - $endmatch=$lastp; - $j=0; - for($i=0;$endmatch<=$lasts;$endmatch++,$start++) - { - if($string[$endmatch]==$pat[$lastp]) - {for($j=0,$i=$start;$j<$lastp && $string[$i]==$pat[$j];$i++,$j++); - }//for $j - if($j==$lastp) - return $start; - }//for $i - return -1; - }//function -} +<?php + +/* + * Class used to store aligned bilingual sentences for two different + * linguistic versions of a same document. For example, English + * sentences with their corresponding French sentences. + */ +include_once "SentenceSegmentor.php"; + +class Multilingual_Aligner_SentenceAlignments +{ + protected $alignment_table=array(); + protected $l1="en"; + protected $l2="fr"; + public function addSentencePair($lng1_sentence, $lng1, $lng2_sentence, $lng2) + { + if($lng1==$this->l1) + { + $this->alignment_table[$lng1_sentence]=$lng2_sentence; + } + else if($lng2==$this->l1) + { $this->alignment_table[$lng2_sentence]=$lng1_sentence; + } + } + + public function getSentenceInOtherLanguage($source_lng_sentence, $source_lng,$key_value,$sentence_array,$index) + { + $segmentor = new Multilingual_Aligner_SentenceSegmentor(); + + if($source_lng==$this->l1) + $k=1; + else if($source_lng==$this->l2) + $k=0; + foreach($this->alignment_table as $key=>$val) + { + if($k==1) + { $sentences = $segmentor->segment(trim($key)); + { + if(strcmp(trim($sentences[0]),trim($source_lng_sentence))==0)//if one of those is matched + { + $found=1; + for($j=1,$l=1;$j<count($sentences) ;$l++) + {$flag=0; + if(($l +$index)>=count($sentence_array)) + {$found=0; + break; + } + if(strcmp(trim($sentence_array[$index+$l]),trim($sentences[$j]))!=0) + { + if($sentence_array[$index+$l]=="" || $sentence_array[$index+$l][0]!="+") //if it is an added sentence + {$found=0; + break; + }//if + else + $flag=1; + }//if + if($flag==0) + $j++; + }//for + if($found==1) + { + $key_value=$key; + $array=array($key,$val); + return $array; + }//if + }//if + + }//foreach + + }//if $k + else + { + $sentences = $segmentor->segment(trim($val)); + { + if(strcmp(trim($sentences[0]),trim($source_lng_sentence))==0)//if one of those is matched + { + $found=1; + for($j=$i+1,$l=1;$j<count($sentences);$l++) + {$flag=0; + if(($l +$index)>=count($sentence_array)) + {$found=0; + break; + } + + if(strcmp(trim($sentence_array[$index+$l]),trim($sentences[$j]))!=0) + { + if($sentence_array[$index+$l]=="" || $sentence_array[$index+$l][0]!="+") //if it is an added sentence + {$found=0; + break; + }//if + else + $flag=1; + }//if + if($flag==0) + $j++; + }//for + if($found==1) + { + $key_value=$val; + $array=array($val,$key); + return $array; + }//if + }//if + + }//foreach + + + + } //else + + }//foreach + + if($k==1) + { $times=0; + $i=-1; + $temp1="NULL"; + $temp2="NULL"; + $index1=$index; + $start=0; + $value=""; + $found=0; + + foreach($this->alignment_table as $key=>$val) + { $start++; + $sent_ind=0; + $sentences = $segmentor->segment(trim($key)); + for($j=0;$j<count($sentences);$j++) + { + $sentences[$j]=trim($sentences[$j]); + } + + + while(1) + {$found=0; + + if($temp1=="NULL" && $sent_ind<count($sentences)) + {$temp1=$sentences[$sent_ind]; + $sent_ind++; + } + if($temp2=="NULL") + {$temp2=$source_lng_sentence; + $index1; + } + $temp1=trim($temp1); + $temp2=trim($temp2); + + if(($c=$this->strpos_function($temp1,$temp2))!=-1 && $c ==0) + { $found=1; + if(strlen($temp1)==strlen($temp2) && $sent_ind==count($sentences)) + {for($u=0;$u<$start;$u++)//return key and val + {prev($this->alignment_table); + } + $d=key($this->alignment_table); + $key_value=$key_value.$d; + $value=$value.current($this->alignment_table); + for($u=0;$u<$start-1;$u++)//return key and val + { next($this->alignment_table); + $d=key($this->alignment_table); + + $key_value=$key_value.$d; + $value=$value.current($this->alignment_table); + + } + $array=array($key_value,$value,$dummy); + $start=0; + return $array; + } + $temp1 = substr($temp1,strlen($temp2)); + if($temp1=="") + $temp1="NULL"; + while(($index1+1)<count($sentence_array)) + {if($sentence_array[$index1+1]=="" || $sentence_array[$index1+1][0]!="+") + { + $temp2=$sentence_array[$index1+1]; + $index1++; + break; + }//if + $index1++; + }//while + continue; + + }//if strpos_function($sentence[0],$source_lng_sentence) + else if(($c=$this->strpos_function($temp2,$temp1))!=-1 && $c ==0) + { + + $found=1; + if(strlen($temp1) == strlen($temp2) && $sent_ind==count($sentences)) + { + for($u=0;$u<$start;$u++)//return key and val + { + prev($this->alignment_table); + } + $d=key($this->alignment_table); + + $key_value=$key_value.$d; + $value=$value.current($this->alignment_table); + for($u=0;$u<$start-1;$u++)//return key and val + { + next($this->alignment_table); + $key_value=$key_value.key($this->alignment_table); + $value=$value.current($this->alignment_table); + + } + $array=array($key_value,$value,$dummy); + $start=0; + return $array; + }//if equal + + $temp2=substr($temp2,strlen($temp1)); + if($sent_ind>=count($sentences)) + { + $temp1="NULL"; + break; + } + else + { + $temp1=$sentences[$sent_ind]; + $sent_ind++; + } + }//if strpos_function($sentence[0],$source_lng_sentence) + if($found==0) + { + $start=0; + $value=""; + break; + }//if + }//while + if($found==0) + { + $temp1="NULL"; + $temp2="NULL"; + $index1=$index; + + } + }//foreach + + }//if $k + else + { $times=0; + $i=-1; + $temp1="NULL"; + $temp2="NULL"; + $index1=$index; + $start=0; + $value=""; + $found=0; + + foreach($this->alignment_table as $key=>$val) + { $start++; + $sent_ind=0; + $sentences = $segmentor->segment(trim($val)); + for($j=0;$j<count($sentences);$j++) + { + $sentences[$j]=trim($sentences[$j]); + } + + + while(1) + {$found=0; + + if($temp1=="NULL" && $sent_ind<count($sentences)) + {$temp1=$sentences[$sent_ind]; + $sent_ind++; + } + if($temp2=="NULL") + {$temp2=$source_lng_sentence; + $index1; + } + $temp1=trim($temp1); + $temp2=trim($temp2); + + if(($c=$this->strpos_function($temp1,$temp2))!=-1 && $c ==0) + { $found=1; + if(strlen($temp1)==strlen($temp2) && $sent_ind==count($sentences)) + {for($u=0;$u<$start;$u++)//return key and val + {prev($this->alignment_table); + } + $d=current($this->alignment_table); + $key_value=$key_value.$d; + $value=$value.key($this->alignment_table); + for($u=0;$u<$start-1;$u++)//return key and val + {next($this->alignment_table); + $d=current($this->alignment_table); + + $key_value=$key_value.$d; + $value=$value.key($this->alignment_table); + + } + $array=array($key_value,$value,$dummy); + $start=0; + return $array; + } + $temp1 = substr($temp1,strlen($temp2)); + if($temp1=="") + $temp1="NULL"; + while(($index1+1)<count($sentence_array)) + {if($sentence_array[$index1+1]=="" || $sentence_array[$index1+1][0]!="+") + { + $temp2=$sentence_array[$index1+1]; + $index1++; + break; + }//if + $index1++; + }//while + continue; + + }//if strpos_function($sentence[0],$source_lng_sentence) + else if(($c=$this->strpos_function($temp2,$temp1))!=-1 && $c ==0) + { + $found=1; + if(strlen($temp1) == strlen($temp2) && $sent_ind==count($sentences)) + { + for($u=0;$u<$start;$u++)//return key and val + { + prev($this->alignment_table); + } + $d=current($this->alignment_table); + + $key_value=$key_value.$d; + $value=$value.key($this->alignment_table); + for($u=0;$u<$start-1;$u++)//return key and val + { + next($this->alignment_table); + $key_value=$key_value.current($this->alignment_table); + $value=$value.current($this->alignment_table); + + } + $array=array($key_value,$value,$dummy); + $start=0; + return $array; + }//if equal + + $temp2=substr($temp2,strlen($temp1)); + if($sent_ind>=count($sentences)) + { + $temp1="NULL"; + break; + } + else + { + $temp1=$sentences[$sent_ind]; + $sent_ind++; + } + }//if strpos_function($sentence[0],$source_lng_sentence) + if($found==0) + {$start=0; + $value=""; + break; + }//if + }//while + if($found==0) + { + $temp1="NULL"; + $temp2="NULL"; + $index1=$index; + + } + }//foreach + + }//else + + $array=array("","NULL"); + return $array; + + } + + public function display_alignment_table() + { + echo "in func display<br/>"; + foreach($this->alignment_table as $key=>$val) { + echo "<-->".$key."<--->".$val."<--><br/>"; + } + } + + public function strpos_function($string,$pat) + { + if(strlen($string)==0 && strlen($pat)==0) + return 0; + else if(strlen($string)==0 ||strlen($pat)==0) + return -1; + $start=0; + $lasts=strlen($string)-1; + $lastp=strlen($pat)-1; + $endmatch=$lastp; + $j=0; + for($i=0;$endmatch<=$lasts;$endmatch++,$start++) + { + if($string[$endmatch]==$pat[$lastp]) + {for($j=0,$i=$start;$j<$lastp && $string[$i]==$pat[$j];$i++,$j++); + }//for $j + if($j==$lastp) + return $start; + }//for $i + return -1; + }//function +} Modified: branches/experimental/tiki-log-merge/lib/core/lib/Multilingual/Aligner/UpdatePages.php =================================================================== --- branches/experimental/tiki-log-merge/lib/core/lib/Multilingual/Aligner/UpdatePages.php 2010-01-15 09:47:23 UTC (rev 24397) +++ branches/experimental/tiki-log-merge/lib/core/lib/Multilingual/Aligner/UpdatePages.php 2010-01-15 09:59:20 UTC (rev 24398) @@ -1,242 +1,242 @@ -<?php - include_once "UpdateSentences.php"; - include_once "SentenceSegmentor.php"; - include_once "SentenceAlignments.php"; - include_once "MockMTWrapper.php"; - - class Multilingual_Aligner_UpdatePages - { - public function setUp() { - $this->alignments = new Multilingual_Aligner_SentenceAlignments(); - $this->translator=new Multilingual_Aligner_MockMTWrapper(); - - } - //protected $alignments= new Multilingual_Aligner_SentenceAlignments(); - - - public function UpdatingTargetPage($source_outofdate,$source_modified,$target_outofdate,$target_modified,$source_lng,$target_lng) - { - $segmentor = new Multilingual_Aligner_SentenceSegmentor(); - - $source_outofdate_string =$source_outofdate; - $source_modified_string = $source_modified; - - $target_outofdate_string = $target_outofdate; - $target_modified_string = $target_modified; - - $source_outofdate_sentences = $segmentor->segment($source_outofdate_string); - $source_modified_sentences = $segmentor->segment($source_modified_string); - - $target_outofdate_sentences = $segmentor->segment($target_outofdate_string); - $target_modified_sentences = $segmentor->segment($target_modified_string); - $target_modified_sentences[count($target_modified_sentences)]="dummy"; - - - $i=-1; - - - for($a=0;$a<count($source_outofdate_sentences);$a++) - { - $source_outofdate_sentences[$a]=trim($source_outofdate_sentences[$a]); - - } - for($a=0;$a<count($source_modified_sentences);$a++) - { - $source_modified_sentences[$a]=trim($source_modified_sentences[$a]); - } - - for($a=0;$a<count($target_outofdate_sentences);$a++) - {//$target_outofdate_sentences[$a]=utf8_decode($target_outofdate_sentences[$a]); - - $target_outofdate_sentences[$a]=trim($target_outofdate_sentences[$a]); - } - - for($a=0;$a<count($target_modified_sentences);$a++) - {//$target_modified_sentences[$a]=utf8_decode($target_modified_sentences[$a]); - - $target_modified_sentences[$a]=trim($target_modified_sentences[$a]); - } - - $update=new Multilingual_Aligner_UpdateSentences1(); - $source_diff=$update->DifferencebetweenOriginalFileandModifiedFile($source_outofdate_sentences,$source_modified_sentences,$this->alignments,$this->translator,"en","fr",1); - $target_diff=$update->DifferencebetweenOriginalFileandModifiedFile($target_outofdate_sentences,$target_modified_sentences,$this->alignments,$this->translator,"en","fr",0); - - for($a=0;$a<count($source_diff);$a++) - { - $source_diff[$a]=trim($source_diff[$a]); - } - for($a=0;$a<count($target_diff);$a++) - { - $target_diff[$a]=trim($target_diff[$a]); - if($target_diff[$a]=="+dummy") - unset($target_diff[$a]); - } - $target_diff=array_values($target_diff); - for($a=0;$a<count($source_diff);$a++) - {$index=0; - $i=2; - if($source_diff[$a]=="" || ($source_diff[$a][0]!="+")) - { - $pat=0; - if($source_diff[$a]!="" && $source_diff[$a][0]=="<" && is_numeric($source_diff[$a][1])) - {$b=2; - - while(is_numeric($source_diff[$a][$b])) - { - $b++; - } - - if($source_diff[$a][$b]==">") - $pat=1; - } - if( $source_diff[$a]==""||$pat==0) - { $index=array_search($source_diff[$a],$source_diff); - $index++; - $first=0; - while(($c=$this->array_search_function($source_diff[$a],$source_diff,$index))!=-1) - {$first=1; - - - $source_diff[$c]="<$i>".$source_diff[$c]; - $i++; - $index=$c+1; - }//while - { - $source_diff[$a]="<1>".$source_diff[$a]; - - } - }//if - }//if - }//for - - for($a=0;$a<count($target_diff);$a++) - {$index=1; - $i=2; - if($target_diff[$a]=="" || ($target_diff[$a][0]!="+")) - { - $pat=0; - if($target_diff[$a]!="" && $target_diff[$a][0]=="<" && is_numeric($target_diff[$a][1])) - {$b=2; - while(is_numeric($target_diff[$a][$b])) - { - $b++; - } - if($target_diff[$a][$b]==">") - $pat=1; - } - if( $target_diff[$a]==""||$pat==0) - { $index=array_search($target_diff[$a],$target_diff); - $index++; - $first=0; - while(($c=$this->array_search_function($target_diff[$a],$target_diff,$index))!=-1) - {$first=1; - - - $target_diff[$c]="<$i>".$target_diff[$c]; - $i++; - $index=$c+1; - - }//while - //if($first==0) - { - $target_diff[$a]="<1>".$target_diff[$a]; - - } - }//if - }//if - }//for - - - $final_updated=$update->FinalUpdatedFileinTagetLanguage($source_diff,$target_diff); - /* - echo "final_updated_sentences<br/>"; - foreach($final_updated as $item) - echo "sentence-> ".$item."<br/>"; - $content = implode(' ',$final_updated); - echo "$content<br/>"; - if(strcmp("Firefox supporte les caract\xE8res internationaux pour des langues tel que lindien. Vous pouvez tester le support Firefox des scripts indiens sur BBC indien. C'est une d\xE9claration d'essai. La plupart des sites qui ont besoin de polices suppl\xE9mentaires vont avoir une page qui d\xE9crit o\xF9 vous pouvez obtenir la police.", $content)==0) - echo "match<br/>"; - else - echo "fail"; - */ - - return $final_updated; - - }//function ends - - public function SetAlignment($source_file,$target_file,$source_lng,$target_lng) - { - $source_array=explode("<br/>",$source_file); - $target_array=explode("<br/>",$target_file); - for($i=1;$i<count($target_array);$i++) - { //$target_array[$i]=utf8_decode($target_array[$i]); - } - - for($i=0;$i<count($source_array);$i++) - { - $this->alignments->addSentencePair($source_array[$i],$source_lng,$target_array[$i],$target_lng); - } - }//function ends - - - public function SetMT($source_file,$target_file,$source_lng,$target_lng) - { - $source_array=explode("<br/>",$source_file); - $target_array=explode("<br/>",$target_file); - - for($i=0;$i<count($target_array);$i++) - { $target_array[$i]=trim($target_array[$i]); - // $target_array[$i]=utf8_decode($target_array[$i]); - - } - for($i=0;$i<count($source_array);$i++) - { - $source_array[$i]=trim($source_array[$i]); - } - for($i=0;$i<count($source_array);$i++) - { - $this->translator->addSentencePair($source_array[$i],$source_lng,$target_array[$i],$target_lng); - } - }//function ends - - - - public function array_search_function($temp,$array,$index) - {$i=0; - for($i=$index;$i<count($array);$i++) - { - $val=$array[$i]; - if(strcmp($temp,$val)==0) - { - return $i; - } - - } - return -1; - } - - - }//class ends - - // algorithm can be checked by running through these instructions - /* - $source_lng="en"; - $target_lng="fr"; - $source_outofdate="Firefox supports international characters for languages such as Hindi. You can test your Firefoxs support of Hindi scripts at BBC Hindi.Most sites that require additional fonts will have a page describing where you can get the font."; - $source_modified="Firefox supports international characters for languages such as Hindi. You can test your Firefoxs support of Hindi scripts at BBC Hindi.Most sites that require additional fonts will have a page describing where you can get the font."; - $target_outofdate="Firefox supporte les caract\xE8res internationaux pour des langues tel que lindien. Vous pouvez tester le support Firefox des scripts indiens sur BBC indien.La plupart des sites qui ont besoin de polices suppl\xE9mentaires vont avoir une page qui d\xE9crit o\xF9 vous pouvez obtenir la police."; - $target_modified="Firefox supporte les caract\xE8res internationaux pour des langues tel que lindien. Vous pouvez tester le support Firefox des scripts indiens sur BBC indien. C'est une d\xE9claration d'essai.La plupart des sites qui ont besoin de polices suppl\xE9mentaires vont avoir une page qui d\xE9crit o\xF9 vous pouvez obtenir la police."; - $source_alignment="Firefox supports international characters for languages such as Hindi.<br/>You can test your Firefoxs support of Hindi scripts at BBC Hindi.<br/>Most sites that require additional fonts will have a page describing where you can get the font."; - $target_alignment="Firefox supporte les caract\xE8res internationaux pour des langues tel que lindien.<br/>Vous pouvez tester le support Firefox des scripts indiens sur BBC indien.<br/>La plupart des sites qui ont besoin de polices suppl\xE9mentaires vont avoir une page qui d\xE9crit o\xF9 vous pouvez obtenir la police."; - - $source_Mtranslation="This is a test statement."; - $target_Mtranslation="C'est une d\xE9claration d'essai."; - - $test=new Multilingual_Aligner_UpdatePages(); - $test->setUp(); - $test->SetAlignment($source_alignment,$target_alignment,$source_lng,$target_lng); - $test->SetMT($source_Mtranslation,$source_Mtranslation,$source_lng,$target_lng); - $test->UpdatingTargetPage($source_outofdate,$source_modified,$target_outofdate,$target_modified,$source_lng,$target_lng); - */ -?> - +<?php + include_once "UpdateSentences.php"; + include_once "SentenceSegmentor.php"; + include_once "SentenceAlignments.php"; + include_once "MockMTWrapper.php"; + + class Multilingual_Aligner_UpdatePages + { + public function setUp() { + $this->alignments = new Multilingual_Aligner_SentenceAlignments(); + $this->translator=new Multilingual_Aligner_MockMTWrapper(); + + } + //protected $alignments= new Multilingual_Aligner_SentenceAlignments(); + + + public function UpdatingTargetPage($source_outofdate,$source_modified,$target_outofdate,$target_modified,$source_lng,$target_lng) + { + $segmentor = new Multilingual_Aligner_SentenceSegmentor(); + + $source_outofdate_string =$source_outofdate; + $source_modified_string = $source_modified; + + $target_outofdate_string = $target_outofdate; + $target_modified_string = $target_modified; + + $source_outofdate_sentences = $segmentor->segment($source_outofdate_string); + $source_modified_sentences = $segmentor->segment($source_modified_string); + + $target_outofdate_sentences = $segmentor->segment($target_outofdate_string); + $target_modified_sentences = $segmentor->segment($target_modified_string); + $target_modified_sentences[count($target_modified_sentences)]="dummy"; + + + $i=-1; + + + for($a=0;$a<count($source_outofdate_sentences);$a++) + { + $source_outofdate_sentences[$a]=trim($source_outofdate_sentences[$a]); + + } + for($a=0;$a<count($source_modified_sentences);$a++) + { + $source_modified_sentences[$a]=trim($source_modified_sentences[$a]); + } + + for($a=0;$a<count($target_outofdate_sentences);$a++) + {//$target_outofdate_sentences[$a]=utf8_decode($target_outofdate_sentences[$a]); + + $target_outofdate_sentences[$a]=trim($target_outofdate_sentences[$a]); + } + + for($a=0;$a<count($target_modified_sentences);$a++) + {//$target_modified_sentences[$a]=utf8_decode($target_modified_sentences[$a]); + + $target_modified_sentences[$a]=trim($target_modified_sentences[$a]); + } + + $update=new Multilingual_Aligner_UpdateSentences1(); + $source_diff=$update->DifferencebetweenOriginalFileandModifiedFile($source_outofdate_sentences,$source_modified_sentences,$this->alignments,$this->translator,"en","fr",1); + $target_diff=$update->DifferencebetweenOriginalFileandModifiedFile($target_outofdate_sentences,$target_modified_sentences,$this->alignments,$this->translator,"en","fr",0); + + for($a=0;$a<count($source_diff);$a++) + { + $source_diff[$a]=trim($source_diff[$a]); + } + for($a=0;$a<count($target_diff);$a++) + { + $target_diff[$a]=trim($target_diff[$a]); + if($target_diff[$a]=="+dummy") + unset($target_diff[$a]); + } + $target_diff=array_values($target_diff); + for($a=0;$a<count($source_diff);$a++) + {$index=0; + $i=2; + if($source_diff[$a]=="" || ($source_diff[$a][0]!="+")) + { + $pat=0; + if($source_diff[$a]!="" && $source_diff[$a][0]=="<" && is_numeric($source_diff[$a][1])) + {$b=2; + + while(is_numeric($source_diff[$a][$b])) + { + $b++; + } + + if($source_diff[$a][$b]==">") + $pat=1; + } + if( $source_diff[$a]==""||$pat==0) + { $index=array_search($source_diff[$a],$source_diff); + $index++; + $first=0; + while(($c=$this->array_search_function($source_diff[$a],$source_diff,$index))!=-1) + {$first=1; + + + $source_diff[$c]="<$i>".$source_diff[$c]; + $i++; + $index=$c+1; + }//while + { + $source_diff[$a]="<1>".$source_diff[$a]; + + } + }//if + }//if + }//for + + for($a=0;$a<count($target_diff);$a++) + {$index=1; + $i=2; + if($target_diff[$a]=="" || ($target_diff[$a][0]!="+")) + { + $pat=0; + if($target_diff[$a]!="" && $target_diff[$a][0]=="<" && is_numeric($target_diff[$a][1])) + {$b=2; + while(is_numeric($target_diff[$a][$b])) + { + $b++; + } + if($target_diff[$a][$b]==">") + $pat=1; + } + if( $target_diff[$a]==""||$pat==0) + { $index=array_search($target_diff[$a],$target_diff); + $index++; + $first=0; + while(($c=$this->array_search_function($target_diff[$a],$target_diff,$index))!=-1) + {$first=1; + + + $target_diff[$c]="<$i>".$target_diff[$c]; + $i++; + $index=$c+1; + + }//while + //if($first==0) + { + $target_diff[$a]="<1>".$target_diff[$a]; + + } + }//if + }//if + }//for + + + $final_updated=$update->FinalUpdatedFileinTagetLanguage($source_diff,$target_diff); + /* + echo "final_updated_sentences<br/>"; + foreach($final_updated as $item) + echo "sentence-> ".$item."<br/>"; + $content = implode(' ',$final_updated); + echo "$content<br/>"; + if(strcmp("Firefox supporte les caract\xE8res internationaux pour des langues tel que lindien. Vous pouvez tester le support Firefox des scripts indiens sur BBC indien. C'est une d\xE9claration d'essai. La plupart des sites qui ont besoin de polices suppl\xE9mentaires vont avoir une page qui d\xE9crit o\xF9 vous pouvez obtenir la police.", $content)==0) + echo "match<br/>"; + else + echo "fail"; + */ + + return $final_updated; + + }//function ends + + public function SetAlignment($source_file,$target_file,$source_lng,$target_lng) + { + $source_array=explode("<br/>",$source_file); + $target_array=explode("<br/>",$target_file); + for($i=1;$i<count($target_array);$i++) + { //$target_array[$i]=utf8_decode($target_array[$i]); + } + + for($i=0;$i<count($source_array);$i++) + { + $this->alignments->addSentencePair($source_array[$i],$source_lng,$target_array[$i],$target_lng); + } + }//function ends + + + public function SetMT($source_file,$target_file,$source_lng,$target_lng) + { + $source_array=explode("<br/>",$source_file); + $target_array=explode("<br/>",$target_file); + + for($i=0;$i<count($target_array);$i++) + { $target_array[$i]=trim($target_array[$i]); + // $target_array[$i]=utf8_decode($target_array[$i]); + + } + for($i=0;$i<count($source_array);$i++) + { + $source_array[$i]=trim($source_array[$i]); + } + for($i=0;$i<count($source_array);$i++) + { + $this->translator->addSentencePair($source_array[$i],$source_lng,$target_array[$i],$target_lng); + } + }//function ends + + + + public function array_search_function($temp,$array,$index) + {$i=0; + for($i=$index;$i<count($array);$i++) + { + $val=$array[$i]; + if(strcmp($temp,$val)==0) + { + return $i; + } + + } + return -1; + } + + + }//class ends + + // algorithm can be checked by running through these instructions + /* + $source_lng="en"; + $target_lng="fr"; + $source_outofdate="Firefox supports international characters for languages such as Hindi. You can test your Firefoxs support of Hindi scripts at BBC Hindi.Most sites that require additional fonts will have a page describing where you can get the font."; + $source_modified="Firefox supports international characters for languages such as Hindi. You can test your Firefoxs support of Hindi scripts at BBC Hindi.Most sites that require additional fonts will have a page describing where you can get the font."; + $target_outofdate="Firefox supporte les caract\xE8res internationaux pour des langues tel que lindien. Vous pouvez tester le support Firefox des scripts indiens sur BBC indien.La plupart des sites qui ont besoin de polices suppl\xE9mentaires vont avoir une page qui d\xE9crit o\xF9 vous pouvez obtenir la police."; + $target_modified="Firefox supporte les caract\xE8res internationaux pour des langues tel que lindien. Vous pouvez tester le support Firefox des scripts indiens sur BBC indien. C'est une d\xE9claration d'essai.La plupart des sites qui ont besoin de polices suppl\xE9mentaires vont avoir une page qui d\xE9crit o\xF9 vous pouvez obtenir la police."; + $source_alignment="Firefox supports international characters for languages such as Hindi.<br/>You can test your Firefoxs support of Hindi scripts at BBC Hindi.<br/>Most sites that require additional fonts will have a page describing where you can get the font."; + $target_alignment="Firefox supporte les caract\xE8res internationaux pour des langues tel que lindien.<br/>Vous pouvez tester le support Firefox des scripts indiens sur BBC indien.<br/>La plupart des sites qui ont besoin de polices suppl\xE9mentaires vont avoir une page qui d\xE9crit o\xF9 vous pouvez obtenir la police."; + + $source_Mtranslation="This is a test statement."; + $target_Mtranslation="C'est une d\xE9claration d'essai."; + + $test=new Multilingual_Aligner_UpdatePages(); + $test->setUp(); + $test->SetAlignment($source_alignment,$target_alignment,$source_lng,$target_lng); + $test->SetMT($source_Mtranslation,$source_Mtranslation,$source_lng,$target_lng); + $test->UpdatingTargetPage($source_outofdate,$source_modified,$target_outofdate,$target_modified,$source_lng,$target_lng); + */ +?> + Modified: branches/experimental/tiki-log-merge/lib/core/lib/Multilingual/Aligner/UpdateSentences.php =================================================================== --- branches/experimental/tiki-log-merge/lib/core/lib/Multilingual/Aligner/UpdateSentences.php 2010-01-15 09:47:23 UTC (rev 24397) +++ branches/experimental/tiki-log-merge/lib/core/lib/Multilingual/Aligner/UpdateSentences.php 2010-01-15 09:59:20 UTC (rev 24398) @@ -1,651 +1,651 @@ -<?php - include_once "lib/diff/Diff.php"; - include_once "lib/diff/difflib.php"; - include_once "lib/diff/Renderer.php"; - include_once "lib/diff/renderer_unified.php"; - include_once "SentenceAlignments.php"; - include_once "SentenceSegmentor.php"; - include_once "MockMTWrapper.php"; - - -/* - * Class used to update the modifications done in one version of page to the other version of same page. - */ - -class Multilingual_Aligner_UpdateSentences1 -{ - - //$translation is 1 in case of source modification(H) and 0 in case of target modification(T"), final_diff is carrying end result - public function DifferencebetweenOriginalFileandModifiedFile($unchangedSource_array,$changedSource_array,$alignments,$translator,$source_lng,$target_lng,$translation) - { - $changed_diff_unchanged=array(); - $changedSource_translated=array(); - - $diff = &new Text_Diff($unchangedSource_array,$changedSource_array); - $context=sizeof($unchangedSource_array); - $renderer = &new Text_Diff_Renderer_unified($context); - $arr=$renderer->render($diff); - $k=0; - $body=0; - $del=0; - $add=0; - foreach($arr as $e) - { - if($k!=0) - { - foreach($e as $key=>$val) - { - if($val=="diffbody") - $body=1; - - - if($val=="diffdeleted") - $del=1; - if($val=="diffadded") - $add=1; - - if($key=="data") - { - foreach($val as $item) - { - if($body==1) - $changed_diff_unchanged[]=$item; - if($del==1) - $changed_diff_unchanged[]="-".$item; - if($add==1) - $changed_diff_unchanged[]="+".$item; - - }//foreach - - - $body=0; - $del=0; - $add=0; - - }//if - - }//foreach - - }//if $k - $k=$k+1; - - - }//foreach - //both files are same - if(count($changed_diff_unchanged)==0) - { - - $changed_diff_unchanged=$changedSource_array; - - } - - foreach($changed_diff_unchanged as $val) - {$num=0; - $val=ereg_replace ('<span class="diffchar">' ,"", $val ); - $val=ereg_replace ('</span>' ,"", $val ); - $new=explode("<br />",$val); - foreach($new as $n) - { - if($val!=""&&$val[0]=="-") - { - if($num==0) - $changed_diff_unchanged_new[]=trim($n); - else - $changed_diff_unchanged_new[]="-".trim($n); - - } - else if($val!=""&&$val[0]=="+") - { - if($num==0) - $changed_diff_unchanged_new[]=trim($n); - else - $changed_diff_unchanged_new[]="+".trim($n); - - } - else - $changed_diff_unchanged_new[]=$n; - $num++; - }//foreach - }//foreach - $changed_diff_unchanged=$changed_diff_unchanged_new; - - $i=0; - foreach($changed_diff_unchanged as $value) - { - if(strcmp(substr($value,0,1),"-")==0) //sentence is preceded by '-' - { - $temp="+".substr($value,1); - $match=$this->array_search_function($temp,$changed_diff_unchanged); - if($match!=-1) //sentence is shuffled - { - - $changed_diff_unchanged[$i]="";//eliminating the -ve sentence - $changed_diff_unchanged[$match]=substr($value,1); - - } - else - { - - $changed_diff_unchanged[$i]="*deleted*"; - - } - }//outer if - - if(strcmp(substr($value,0,1),"+")==0) //sentence is preceded by '+' - { - $temp="-".substr($value,1); - $match=$this->array_search_function($temp,$changed_diff_unchanged); - if($match!=-1) //sentence is shuffled - { - - $changed_diff_unchanged[$match]="*deleted*";//eliminating the -ve sentence - $changed_diff_unchanged[$i]=substr($value,1); - - } - else - { - //do nothing--sentence is added - - } - - - - }//outer if - - - $i=$i+1; - }//foreach - - - //Converting sentences in Source Language to Target language - if($translation==1) - { - - $changedSource_translated=$this->changedSourceFileTranslatedIntoTargetLanguage($changed_diff_unchanged,$alignments,$translator,$source_lng,$target_lng); - - $final_diff= $changedSource_translated; - }//if - else - { - $changed_final=array(); - $i=0; - foreach($changed_diff_unchanged as $val) - { - if(strcmp($changed_diff_unchanged[$i],"*deleted*")!=0) - $changed_final[]=trim($changed_diff_unchanged[$i]); - $i=$i+1; - }//foreach - - $final_diff=$changed_final; - }//else - - return $final_diff; - }//function ends - - - //// - - - public function changedSourceFileTranslatedIntoTargetLanguage($changed_diff_unchanged,$alignments,$translator,$source_lng,$target_lng) { - - $segmentor = new Multilingual_Aligner_SentenceSegmentor(); - - $num=0; - - foreach($changed_diff_unchanged as $value) - { - if($value=="*deleted*") - unset($changed_diff_unchanged[$num]); - $num++; - } - $changed_diff_unchanged = array_values($changed_diff_unchanged); - $num=0; - while(count($changed_diff_unchanged )>0) - { $value=$changed_diff_unchanged[0]; - $num++; - { $key_value=""; - - $target_lng_array=$alignments->getSentenceInOtherLanguage($value, $source_lng,$key_value,$changed_diff_unchanged,$this->array_search_function($value,$changed_diff_unchanged)); //as two or more target sentences are being considered as one string, here instead of string arrays should be returned - $key_value=$target_lng_array[0]; - $target_lng_sentence=$target_lng_array[1]; - if(strcmp($target_lng_sentence,"NULL")!=0) - { - $source_sent=$segmentor->segment(trim($key_value)); - $index=$this->array_search_function($value,$changed_diff_unchanged); - $j=0; - - for($i=$index;$i<count($source_sent)+$index+$j;$i++) - {if($changed_diff_unchanged[$i]=="" || $changed_diff_unchanged[$i][0]!="+") - { - unset($changed_diff_unchanged[$i]); - - } - else - $j++; - - } //for - - $sentences=$segmentor->segment(trim($target_lng_sentence)); - foreach($sentences as $item) - { - $changedSource_translated[]=trim($item); - } - - } - else //Machine Translation is required - { - if($value!="" && $value!="+") - { - if($value[0]=="+") - {$temp=substr($value,1); - $translation = $translator->getTranslationInOtherLanguage($temp,$source_lng); - if($translation!="NULL") - {$changedSource_translated[]="+".trim($translation); - }//if !NULL - else - { - - $changedSource_translated[]="+"."no translation is available in french for $temp"; - } - }//if [0]=="+" - - else - { - $translation = $translator->getTranslationInOtherLanguage($value,$source_lng); - if($translation!="NULL") - {$changedSource_translated[]="+".trim($translation); - }//if !NULL - else - $changedSource_translated[]="+"."no translation is available in french for $value"; - - }//else - }//if - else - { - $changedSource_translated[]=$value; - } - $index=$this->array_search_function($value,$changed_diff_unchanged); - unset($changed_diff_unchanged[$index]); - - - } - $changed_diff_unchanged = array_values($changed_diff_unchanged); - - }//if - - } //foreach - return $changedSource_translated; - - - - - }//function ends - - - public function FinalUpdatedFileinTagetLanguage($Souce_Updated_Translated,$Target_Updated) - { - - $diff = &new Text_Diff($Souce_Updated_Translated,$Target_Updated); - $context=sizeof($Souce_Updated_Translated); - - $renderer = &new Text_Diff_Renderer_unified($context); - $arr=$renderer->render($diff); - $k=0; - $body=0; - $del=0; - $add=0; - foreach($arr as $e) - { - if($k!=0) - { - foreach($e as $key=>$val) - { - if($val=="diffbody") - $body=1; - - - if($val=="diffdeleted") - $del=1; - if($val=="diffadded") - $add=1; - - if($key=="data") - { - foreach($val as $item) - { - if($body==1) - $target_diff_source[]=$item; - if($del==1) - $target_diff_source[]="-".$item; - if($add==1) - $target_diff_source[]="+".$item; - }//foreach - - - $body=0; - $del=0; - $add=0; - - }//if - - }//foreach - - }//if $k - $k=$k+1; - - - }//foreach - - $target_diff_source_new=array(); - foreach($target_diff_source as $val) - {$num=0; - $val=ereg_replace ('<span class="diffchar">' ,"", $val ); - $val=ereg_replace ('</span>' ,"", $val ); - $new=explode("<br />",$val); - foreach($new as $n) - { - if($val[0]=="-") - { - if($num==0) - $target_diff_source_new[]=trim($n); - else - $target_diff_source_new[]="-".trim($n); - - } - else if($val[0]=="+") - { - if($num==0) - $target_diff_source_new[]=trim($n); - else - $target_diff_source_new[]="+".trim($n); - - } - else - $target_diff_source_new[]=$n; - $num++; - }//foreach - }//foreach - - //difference over - - //generation of three arrays - $negative_array=array(); - $positive_array=array(); - $normal_array=array(); - $i=-1; - foreach($target_diff_source_new as $value) - {$i++; - - if(strcmp(substr($value,0,1),"-")==0)//sentence starts with '-' - { - $temp="+".substr($value,1); - $match=$this->array_search_function($temp,$target_diff_source_new); - if($match!=-1) - { - if($temp[1]=='+')//if same sentence is being added in to both source and target files - { - $target_diff_source_new[$i]=""; - } - else - { - if(($this->array_search_function(substr($value,1),$normal_array))==-1) - { - $normal_array[]=substr($value,1); - - }//if not present in normal_array - - }//else - }//if match - - else - { - $negative_array[]=$value; - - }//match not found - }//if '-' - - - else if(strcmp(substr($value,0,1),"+")==0)//sentence starts with '+' - { - $temp="-".substr($value,1); - $match=$this->array_search_function($temp,$target_diff_source_new); - if($match!=-1) - { - if($temp[1]=="+")//if same sentence is being added in to both source and target files - { - $positive_array[]=$value; - } - else - { - if($this->array_search_function(substr($value,1),$normal_array)==-1) - { - $normal_array[]=substr($value,1); - - }//if not present in normal_array - - - }//else - }//if match found - else - { - $positive_array[]=$value; - - }//match not found - }//if '+' - else //normal sentence - { - - $normal_array[]=$value; - - - }//normal sentence - - }//foreach - - //generation of three arrays is complete - - //Creating hash table to get the proper location for insertion - $add_beginning=array(); - $sentence_location=array(); - foreach($negative_array as $item) - { - $index=$this->array_search_function($item,$target_diff_source_new); - $get=0; // to check if there is any normal sentence before this negative sentence - for($j=$index-1;$j>=0;$j--) - { - if($get==1) - break; - if($target_diff_source_new[$j][0]=="+" || $target_diff_source_new[$j][0]=="-") - $temp=substr($target_diff_source_new[$j],1); - else - $temp=$target_diff_source_new[$j]; - $search_result=$this->array_search_function($temp,$normal_array); - if($search_result!=-1)//found in normal array - {$found=0; //to chack if already present in hash table - $get=1; ///found a normal sentence before - foreach($sentence_location as $key=>$val) - { - if(strcmp($key,$temp)==0) - { - $found=1; - - $sentence_location[$key][count($sentence_location[$key])]=$item; - - } - - }//foreach - if($found==0) - { $sentence_location[$temp]=array($item); - } - }//if search_result - - //search in positive_array is doubtful - }//for $j - - if($get==0) - { - $add_beginning[]=$item; - } - - }//foreach negative_array - - //sentence_location and add_beginning is complete - - //generation of final updated target file - - foreach($add_beginning as $item) - { - if($item[1]=="+") - { - $finalUpdatedTarget[]="Added_Source ".substr($item,2); - } - else - { - $b=2; - while(is_numeric($item[$b])) - { - $b++; - } - - $finalUpdatedTarget[]="Deleted_Target ".substr($item,$b+1); - - - } - - - - }//foreach - - foreach($Target_Updated as $item) - { - if(($index=$this->array_search_function("+".$item,$positive_array))!=-1) //if present in positive_array - { - if($positive_array[$index]!="+" && $positive_array[$index][1]=='+')//'++' case - $temp=substr($item,1); - else if($positive_array[$index]=="+" || $positive_array[$index][1]!='+') //"+" case - { - //if($item!="") - { - { - // if($item[0]=="<"&&$item[2]==">"&&is_numeric($item[1])) - $b=2; - while(is_numeric($item[$b])) - { - $b++; - } - - $item=substr($item,$b+1); - - } - - }//if - $temp="Deleted_Source ".$item; - } - $finalUpdatedTarget[]=$temp; - - }//if present in positive_array - - else //present in normal arrray - {$item1=$item; - if($item!="") - { - { - if($item[0]=="+")//if same sentence is added at same positions in both source and target - $item1=substr($item,1); - else{ - $b=2; - while(is_numeric($item[$b])) - { - $b++; - } - - $item1=substr($item,$b+1); - }//else - - } - }//if - - $finalUpdatedTarget[]=$item1; - foreach($sentence_location as $key=>$val) - { - if(strcmp($key,$item)==0) - { - foreach($val as $add) - { - if($add!="-" && $add[1]=="+") - { - - $finalUpdatedTarget[]="Added_Source ".substr($add,2); - } - else if($add=="-" || $add[1]!="+" ) - { - - - if($add!="-") - { - - $b=2; - while(is_numeric($add[$b])) - { - $b++; - } - - $add=substr($add,0,1).substr($add,$b+1); - - - - }//if - - $finalUpdatedTarget[]="Deleted_Target ".substr($add,1); - } - - } - break; - }//if - - - }//foreach - } - }//foreach $Target_Updated - return $finalUpdatedTarget; - }//function ends - - public function array_search_function($temp,$array) - { - $i=0; - foreach($array as $val) - { - if(strcmp($temp,$val)==0) - { - return $i; - } - $i++; - } - return -1; - }//function over - - public function strpos_function($string,$pat) - { - if(strlen($string)==0 && strlen($pat)==0) - return 0; - else if(strlen($string)==0 ||strlen($pat)==0) - return -1; - $start=0; - $lasts=strlen($string)-1; - $lastp=strlen($pat)-1; - $endmatch=$lastp; - $j=0; - for($i=0;$endmatch<=$lasts;$endmatch++,$start++) - { - if($string[$endmatch]==$pat[$lastp]) - {for($j=0,$i=$start;$j<$lastp && $string[$i]==$pat[$j];$i++,$j++); - }//for $j - if($j==$lastp) - return $start; - }//for $i - return -1; - }//function over - - -} - +<?php + include_once "lib/diff/Diff.php"; + include_once "lib/diff/difflib.php"; + include_once "lib/diff/Renderer.php"; + include_once "lib/diff/renderer_unified.php"; + include_once "SentenceAlignments.php"; + include_once "SentenceSegmentor.php"; + include_once "MockMTWrapper.php"; + + +/* + * Class used to update the modifications done in one version of page to the other version of same page. + */ + +class Multilingual_Aligner_UpdateSentences1 +{ + + //$translation is 1 in case of source modification(H) and 0 in case of target modification(T"), final_diff is carrying end result + public function DifferencebetweenOriginalFileandModifiedFile($unchangedSource_array,$changedSource_array,$alignments,$translator,$source_lng,$target_lng,$translation) + { + $changed_diff_unchanged=array(); + $changedSource_translated=array(); + + $diff = &new Text_Diff($unchangedSource_array,$changedSource_array); + $context=count($unchangedSource_array); + $renderer = &new Text_Diff_Renderer_unified($context); + $arr=$renderer->render($diff); + $k=0; + $body=0; + $del=0; + $add=0; + foreach($arr as $e) + { + if($k!=0) + { + foreach($e as $key=>$val) + { + if($val=="diffbody") + $body=1; + + + if($val=="diffdeleted") + $del=1; + if($val=="diffadded") + $add=1; + + if($key=="data") + { + foreach($val as $item) + { + if($body==1) + $changed_diff_unchanged[]=$item; + if($del==1) + $changed_diff_unchanged[]="-".$item; + if($add==1) + $changed_diff_unchanged[]="+".$item; + + }//foreach + + + $body=0; + $del=0; + $add=0; + + }//if + + }//foreach + + }//if $k + $k=$k+1; + + + }//foreach + //both files are same + if(count($changed_diff_unchanged)==0) + { + + $changed_diff_unchanged=$changedSource_array; + + } + + foreach($changed_diff_unchanged as $val) + {$num=0; + $val=ereg_replace ('<span class="diffchar">' ,"", $val ); + $val=ereg_replace ('</span>' ,"", $val ); + $new=explode("<br />",$val); + foreach($new as $n) + { + if($val!=""&&$val[0]=="-") + { + if($num==0) + $changed_diff_unchanged_new[]=trim($n); + else + $changed_diff_unchanged_new[]="-".trim($n); + + } + else if($val!=""&&$val[0]=="+") + { + if($num==0) + $changed_diff_unchanged_new[]=trim($n); + else + $changed_diff_unchanged_new[]="+".trim($n); + + } + else + $changed_diff_unchanged_new[]=$n; + $num++; + }//foreach + }//foreach + $changed_diff_unchanged=$changed_diff_unchanged_new; + + $i=0; + foreach($changed_diff_unchanged as $value) + { + if(strcmp(substr($value,0,1),"-")==0) //sentence is preceded by '-' + { + $temp="+".substr($value,1); + $match=$this->array_search_function($temp,$changed_diff_unchanged); + if($match!=-1) //sentence is shuffled + { + + $changed_diff_unchanged[$i]="";//eliminating the -ve sentence + $changed_diff_unchanged[$match]=substr($value,1); + + } + else + { + + $changed_diff_unchanged[$i]="*deleted*"; + + } + }//outer if + + if(strcmp(substr($value,0,1),"+")==0) //sentence is preceded by '+' + { + $temp="-".substr($value,1); + $match=$this->array_search_function($temp,$changed_diff_unchanged); + if($match!=-1) //sentence is shuffled + { + + $changed_diff_unchanged[$match]="*deleted*";//eliminating the -ve sentence + $changed_diff_unchanged[$i]=substr($value,1); + + } + else + { + //do nothing--sentence is added + + } + + + + }//outer if + + + $i=$i+1; + }//foreach + + + //Converting sentences in Source Language to Target language + if($translation==1) + { + + $changedSource_translated=$this->changedSourceFileTranslatedIntoTargetLanguage($changed_diff_unchanged,$alignments,$translator,$source_lng,$target_lng); + + $final_diff= $changedSource_translated; + }//if + else + { + $changed_final=array();... [truncated message content] |