-
Astor Bizard authoredAstor Bizard authored
diff.class.php 20.77 KiB
<?php
// This file is part of VPL for Moodle - http://vpl.dis.ulpgc.es/
//
// VPL for Moodle is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// VPL for Moodle is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with VPL for Moodle. If not, see <http://www.gnu.org/licenses/>.
/**
* Class to show two files diff
*
* @package mod_vpl
* @copyright 2012 Juan Carlos Rodríguez-del-Pino
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
* @author Juan Carlos Rodríguez-del-Pino <jcrodriguez@dis.ulpgc.es>
*/
defined('MOODLE_INTERNAL') || die();
require_once(dirname(__FILE__).'/../locallib.php');
require_once(dirname(__FILE__).'/../vpl.class.php');
require_once(dirname(__FILE__).'/../vpl_submission.class.php');
require_once(dirname(__FILE__).'/similarity_factory.class.php');
require_once(dirname(__FILE__).'/similarity_sources.class.php');
class vpl_diff {
/**
* Remove chars and digits
*
* @param $line string
* to process
* @return string without chars and digits
*/
static public function removealphanum($line) {
$ret = '';
$l = strlen( $line );
// Parse line to remove alphanum chars.
for ($i = 0; $i < $l; $i ++) {
$c = $line [$i];
if (! ctype_alnum( $c ) && $c != ' ') {
$ret .= $c;
}
}
return $ret;
}
/**
* Calculate the similarity of two lines
*
* @param
* $line1
* @param
* $line2
* @return int (3 => trimmed equal, 2 =>removealphanum , 1 => start of line , 0 => not equal)
*/
static public function diffline($line1, $line2) {
// TODO Refactor.
// This is a bad solution that must be rebuild to consider diferent languages.
// Compare trimed text.
$line1 = trim( $line1 );
$line2 = trim( $line2 );
if ($line1 == $line2) {
if (strlen( $line1 ) > 0) {
return 3;
} else {
return 1;
}
}
// Compare filtered text (removing alphanum).
$ran1 = self::removealphanum( $line1 );
$limit = strlen( $ran1 );
if ($limit > 0) {
if ($limit > 3) {
$limite = 3;
}
if (strncmp( $ran1, self::removealphanum( $line2 ), $limit ) == 0) {
return 2;
}
}
// Compare start of line.
$l = 4;
if ($l > strlen( $line1 )) {
$l = strlen( $line1 );
}
if ($l > strlen( $line2 )) {
$l = strlen( $line2 );
}
for ($i = 0; $i < $l; ++ $i) {
if ($line1 [$i] != $line2 [$i]) {
break;
}
}
return $i > 0 ? 1 : 0;
}
static public function newlineinfo($type, $ln1, $ln2 = 0) {
$ret = new StdClass();
$ret->type = $type;
$ret->ln1 = $ln1;
$ret->ln2 = $ln2;
return $ret;
}
/**
* Initialize used matrix
*
* @param array $matrix of arrays to initialize
* @param array $prev of arrays to initialize
* @param $nl1 number of rows
* @param $nl2 number of columns
* @return void
*/
static public function initauxiliarmatrices(&$matrix, &$prev, $nl1, $nl2) {
// Set the matrix[0..nl1+1][0..nl2+1] to 0.
$row = array_pad( array (), $nl2 + 1, 0 );
$matrix = array_pad( array (), $nl1 + 1, $row );
// Set the prev matrix [0..nl1+1][0..nl2+1] to 0.
$prev = $matrix;
// Update first column.
for ($i = 0; $i <= $nl1; $i ++) {
$matriz [$i] [0] = 0;
$prev [$i] [0] = - 1;
}
// Update first row.
for ($j = 1; $j <= $nl2; $j ++) {
$matriz [0] [$j] = 0;
$prev [0] [$j] = 1;
}
}
static public function similine($line1, $line2, $pattern) {
return preg_replace($pattern, '', $line1) == preg_replace($pattern, '', $line2);
}
/**
* Calculate diff for two array of lines
*
* @param $lines1 array
* of string
* @param $lines2 array
* of string
* @param $detaileddiff bool
* whether different levels of similarity should be distinguished
* @return array of objects with info to show the two array of lines
*/
static public function calculatediff($lines1, $lines2, $detaileddiff = true) {
$ret = array ();
$nl1 = count( $lines1 );
$nl2 = count( $lines2 );
if ($nl1 == 0 && $nl2 == 0) {
return false;
}
if ($nl1 == 0) { // There is no first file.
foreach ($lines2 as $pos => $line) {
$ret [] = self::newlineinfo( '>', 0, $pos + 1 );
}
return $ret;
}
if ($nl2 == 0) { // There is no second file.
foreach ($lines1 as $pos => $line) {
$ret [] = self::newlineinfo( '<', $pos + 1 );
}
return $ret;
}
self::initauxiliarmatrices( $matrix, $prev, $nl1, $nl2 );
// Matrix processing.
for ($i = 1; $i <= $nl1; $i ++) {
$line = $lines1 [$i - 1];
for ($j = 1; $j <= $nl2; $j ++) {
if ($matrix [$i] [$j - 1] > $matrix [$i - 1] [$j]) {
$max = $matrix [$i] [$j - 1];
$best = 1;
} else {
$max = $matrix [$i - 1] [$j];
$best = - 1;
}
if ($detaileddiff) {
$prize = self::diffline( $line, $lines2 [$j - 1] );
} else {
$prize = $line === $lines2 [$j - 1] ? 1 : 0;
}
if ($matrix [$i - 1] [$j - 1] + $prize >= $max) {
$max = $matrix [$i - 1] [$j - 1] + $prize;
$best = 0;
}
$matrix [$i] [$j] = $max;
$prev [$i] [$j] = $best;
}
}
// Calculate show info.
$limit = $nl1 + $nl2;
$pairs = array ();
$pi = $nl1;
$pj = $nl2;
while ( (! ($pi == 0 && $pj == 0)) && $limit > 0 ) {
$pair = new stdClass();
$pair->i = $pi;
$pair->j = $pj;
$pairs [] = $pair;
$p = $prev [$pi] [$pj];
if ($p == 0) {
$pi --;
$pj --;
} else if ($p == - 1) {
$pi --;
} else if ($p == 1) {
$pj --;
} else {
debbuging('error');
}
$limit --;
}
krsort( $pairs );
$prevpair = new stdClass();
$prevpair->i = 0;
$prevpair->j = 0;
foreach ($pairs as $pair) {
if ($pair->i == $prevpair->i + 1 && $pair->j == $prevpair->j + 1) { // Regular advance.
$l1 = $lines1 [$pair->i - 1];
$l2 = $lines2 [$pair->j - 1];
if ($l1 == $l2) { // Equals.
$ret [] = self::newlineinfo( '=', $pair->i, $pair->j );
} else if ( self::similine($l1, $l2, '/\s/')) {
$ret [] = self::newlineinfo( '1', $pair->i, $pair->j );
} else if ( self::similine($l1, $l2, '/(\s|[0-9]|[a-z])/i')) {
$ret [] = self::newlineinfo( '2', $pair->i, $pair->j );
} else {
$ret [] = self::newlineinfo( '#', $pair->i, $pair->j );
}
} else if ($pair->i == $prevpair->i + 1) { // Removed next line.
$ret [] = self::newlineinfo( '<', $pair->i, false );
} else if ($pair->j == $prevpair->j + 1) { // Added one line.
$ret [] = self::newlineinfo( '>', false, $pair->j );
} else {
debugging( "Internal error " . s( $pair ) . " " . s( $prevpair) );
}
$prevpair = $pair;
}
return $ret;
}
static public function show($filename1, $data1, $htmlheader1, $filename2, $data2, $htmlheader2) {
// Get file lines.
$nl = vpl_detect_newline( $data1 );
$lines1 = explode( $nl, $data1 );
$nl = vpl_detect_newline( $data2 );
$lines2 = explode( $nl, $data2 );
// Get dif as an array of info.
$diff = self::calculatediff( $lines1, $lines2 );
if ($diff === false) {
return;
}
$separator = array (
'<' => ' <<< ',
'>' => ' >>> ',
'=' => ' === ',
'1' => ' ==# ',
'2' => ' =## ',
'#' => ' ### '
);
$emptyline = "\n";
$data1 = '';
$data2 = '';
$datal1 = '';
$datal2 = '';
$diffl = '';
$lines1 [- 1] = '';
$lines2 [- 1] = '';
foreach ($diff as $line) {
$diffl .= $separator [$line->type] . "\n";
if ($line->ln1) {
$datal1 .= sprintf("%4d\n", $line->ln1);
$data1 .= $lines1 [$line->ln1 - 1] . "\n";
} else {
if ( $data1 == '' ) {
$data1 .= $emptyline;
$datal1 .= $emptyline;
}
$data1 .= $emptyline;
$datal1 .= $emptyline;
}
if ($line->ln2) {
$datal2 .= sprintf("%4d\n", $line->ln2);
$data2 .= $lines2 [$line->ln2 - 1] . "\n";
} else {
if ( $data2 == '' ) {
$data2 .= $emptyline;
$datal2 .= $emptyline;
}
$data2 .= $emptyline;
$datal2 .= $emptyline;
}
}
echo '<div style="width: 100%;min-width: 950px; overflow: auto">';
// Header.
echo '<div style="float:left; width: 445px">';
echo $htmlheader1;
echo '</div>';
echo '<div style="float:left; width: 445px">';
echo $htmlheader2;
echo '</div>';
echo '<div style="clear:both;"></div>';
// Files.
$pre = '<pre clas="vpl_g">';
echo '<div style="float:left; text-align: right; width: 3em">';
$shower = vpl_sh_factory::get_sh( 'a.txt' );
$shower->print_file( 'a.txt', $datal1, false, count($diff) + 1, false );
echo '</div>';
echo '<div style="float:left; width: 390px; overflow:auto">';
$shower = vpl_sh_factory::get_sh( $filename1 );
$shower->print_file( $filename1, $data1, false, count($diff) + 1, false );
echo '</div>';
echo '<div style="float:left; width: 3em"">';
$shower = vpl_sh_factory::get_sh( 'b.txt' );
$shower->print_file( 'b.txt', $diffl, false, count($diff) + 1, false );
echo '</div>';
echo '<div style="float:left; text-align: right; width: 3em"">';
$shower = vpl_sh_factory::get_sh( 'b.txt' );
$shower->print_file( 'b.txt', $datal2, false, count($diff) + 1, false );
echo '</div>';
echo '<div style="float:left; width: 390px; overflow:auto">';
$shower = vpl_sh_factory::get_sh( $filename2 );
$shower->print_file( $filename2, $data2, false, count($diff) + 1, false );
echo '</div>';
echo '</div>';
echo '<div style="clear:both;"></div>';
vpl_sh_factory::syntaxhighlight();
}
static public function vpl_get_similfile($f, &$htmlheader, &$filename, &$data) {
global $DB;
$htmlheader = '';
$filename = '';
$data = '';
$type = required_param( 'type' . $f, PARAM_INT );
if ($type == 1) {
$subid = required_param( 'subid' . $f, PARAM_INT );
$filename = required_param( 'filename' . $f, PARAM_TEXT );
$subinstance = $DB->get_record( 'vpl_submissions', array (
'id' => $subid
) );
if ($subinstance !== false) {
$vpl = new mod_vpl( false, $subinstance->vpl );
$vpl->require_capability( VPL_SIMILARITY_CAPABILITY );
$submission = new mod_vpl_submission( $vpl, $subinstance );
$user = $DB->get_record( 'user', array (
'id' => $subinstance->userid
) );
if ($user) {
$link = vpl_mod_href( '/forms/submissionview.php', 'id', $vpl->get_course_module()->id
, 'userid', $subinstance->userid );
$htmlheader .= '<a href="' . $link . '">';
}
$htmlheader .= s( $filename ) . ' ';
if ($user) {
$htmlheader .= '</a>';
$htmlheader .= $vpl->user_fullname_picture( $user );
}
$fg = $submission->get_submitted_fgm();
$data = $fg->getFileData( $filename );
\mod_vpl\event\vpl_diff_viewed::log( $submission );
}
} else if ($type == 3) {
global $CFG;
$data = '';
$vplid = required_param( 'vplid' . $f, PARAM_INT );
$vpl = new mod_vpl( false, $vplid );
$vpl->require_capability( VPL_SIMILARITY_CAPABILITY );
$zipname = required_param( 'zipfile' . $f, PARAM_RAW );
$filename = required_param( 'filename' . $f, PARAM_RAW );
$htmlheader .= $filename . ' ' . optional_param( 'username' . $f, '', PARAM_TEXT );
$ext = strtoupper( pathinfo( $zipname, PATHINFO_EXTENSION ) );
if ($ext != 'ZIP') {
print_error( 'nozipfile' );
}
$zip = new ZipArchive();
$zipfilename = vpl_similarity_preprocess::get_zip_filepath( $vplid, $zipname );
if ($zip->open( $zipfilename ) === true) {
$data = $zip->getFromName( $filename );
$zip->close();
}
} else {
print_error( 'type error' );
}
}
/**
* Computes diff between two lines.
* @param string $line1
* @param string $line2
* @return int The diff in number of chars.
*/
static public function compute_linediff($line1, $line2) {
$n1 = strlen($line1);
$n2 = strlen($line2);
if ($n1 == 0 || $n2 == 0) {
return $n1 + $n2;
}
$queue = array_fill(0, $n1 + $n2 + 1, array());
$done = array_fill(0, $n1 + 1, array());
$z = new stdClass();
$z->i1 = 0;
$z->i2 = 0;
$z->d = 0;
$queue[0][] = $z;
$priority = 0;
// A-star search of shortest diff.
while (true) {
while (count($queue[$priority]) == 0) {
$priority++;
}
$z = array_pop($queue[$priority]);
$i1 = $z->i1;
$i2 = $z->i2;
if (isset($done[$i1][$i2])) {
continue;
} else {
$done[$i1][$i2] = true;
}
// Identical substring: 0 distance.
while ($i1 < $n1 && $i2 < $n2 && $line1{$i1} == $line2{$i2}) {
$i1++;
$i2++;
$done[$i1][$i2] = true;
}
if ($i1 == $n1 && $i2 == $n2) {
// Found shortest diff.
return $z->d;
}
// Character addition in line1.
if ($i1 < $n1 && !isset($done[$i1 + 1][$i2])) {
$z1 = new stdClass();
$z1->i1 = $i1 + 1;
$z1->i2 = $i2;
$z1->d = $z->d + 1;
$queue[$z1->d + abs(($n1 - $z1->i1) - ($n2 - $z1->i2))][] = $z1;
}
// Character addition in line2.
if ($i2 < $n2 && !isset($done[$i1][$i2 + 1])) {
$z2 = new stdClass();
$z2->i1 = $i1;
$z2->i2 = $i2 + 1;
$z2->d = $z->d + 1;
$queue[$z2->d + abs(($n1 - $z2->i1) - ($n2 - $z2->i2))][] = $z2;
}
// Character change.
if ($i1 < $n1 && $i2 < $n2 && !isset($done[$i1 + 1][$i2 + 1])) {
$z3 = new stdClass();
$z3->i1 = $i1 + 1;
$z3->i2 = $i2 + 1;
$z3->d = $z->d + 1;
$queue[$z3->d + abs(($n1 - $z3->i1) - ($n2 - $z3->i2))][] = $z3;
}
}
}
/**
* Computes diff between two files.
* @param string $filedata1
* @param string $filedata2
* @param int $timelimit The maximum amount of time to spend on this computation (in milliseconds).
* If provided and the time limit is exceeded, a moodle_exception with code 'difftoolarge' will be thrown.
* @return int The diff in number of chars.
*/
static public function compute_filediff($filedata1, $filedata2, $timelimit=0) {
if (strlen($filedata1) == 0 || strlen($filedata2) == 0) {
return strlen($filedata1) + strlen($filedata2);
}
$starttime = microtime(true);
$lines1 = explode("\n", $filedata1);
$lines2 = explode("\n", $filedata2);
$n1 = count($lines1);
$n2 = count($lines2);
$queue = array_fill(0, $n1 + $n2 + 1, array());
$prev = array_fill(0, $n1 + 1, array());
$z = new stdClass();
$z->i1 = 0;
$z->i2 = 0;
$z->d = 0;
$z->prev = 0;
$queue[0][] = $z;
$priority = 0;
// A-star search of shortest diff.
while (true) {
if ($timelimit > 0 && microtime(true) - $starttime > $timelimit / 1000) {
// Time out.
throw new moodle_exception('difftoolarge');
}
while (count($queue[$priority]) == 0) {
$priority++;
}
$z = array_pop($queue[$priority]);
$i1 = $z->i1;
$i2 = $z->i2;
if (isset($prev[$i1][$i2])) {
continue;
} else {
$prev[$i1][$i2] = $z->prev;
}
// Identical substring: 0 distance.
while ($i1 < $n1 && $i2 < $n2 && $lines1[$i1] == $lines2[$i2]) {
$i1++;
$i2++;
$prev[$i1][$i2] = 0;
}
if ($i1 == $n1 && $i2 == $n2) {
// Found shortest diff amongst lines.
// Backtrack to compute total diff as a sum of lines diff.
$totaldiff = 0;
while ($i1 > 0 || $i2 > 0) {
if (!isset($prev[$i1][$i2])) {
$i1--;
$i2--;
} else {
switch ($prev[$i1][$i2]) {
case 1 :
$totaldiff += strlen( $lines1[$i1 - 1] ) + 1;
$i1--;
break;
case 2 :
$totaldiff += strlen( $lines2[$i2 - 1] ) + 1;
$i2--;
break;
case 3 :
$totaldiff += self::compute_linediff( $lines1[$i1 - 1], $lines2[$i2 - 1] );
case 0 :
$i1--;
$i2--;
break;
}
}
}
return $totaldiff;
}
// Line addition in file1.
if ($i1 < $n1 && !isset($prev[$i1 + 1][$i2])) {
$z1 = new stdClass();
$z1->i1 = $i1 + 1;
$z1->i2 = $i2;
$z1->d = $z->d + 1;
$z1->prev = 1;
$queue[$z1->d + abs(($n1 - $z1->i1) - ($n2 - $z1->i2))][] = $z1;
}
// Line addition in file2.
if ($i2 < $n2 && !isset($prev[$i1][$i2 + 1])) {
$z2 = new stdClass();
$z2->i1 = $i1;
$z2->i2 = $i2 + 1;
$z2->d = $z->d + 1;
$z2->prev = 2;
$queue[$z2->d + abs(($n1 - $z2->i1) - ($n2 - $z2->i2))][] = $z2;
}
// Line change.
if ($i1 < $n1 && $i2 < $n2 && !isset($prev[$i1 + 1][$i2 + 1])) {
$z3 = new stdClass();
$z3->i1 = $i1 + 1;
$z3->i2 = $i2 + 1;
$z3->d = $z->d + 1;
$z3->prev = 3;
$queue[$z3->d + abs(($n1 - $z3->i1) - ($n2 - $z3->i2))][] = $z3;
}
}
}
}