Skip to content
Snippets Groups Projects
diff.class.php 20.77 KiB
<?php
// This file is part of VPL for Moodle - http://vpl.dis.ulpgc.es/
//
// VPL for Moodle is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// VPL for Moodle is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with VPL for Moodle.  If not, see <http://www.gnu.org/licenses/>.

/**
 * Class to show two files diff
 *
 * @package mod_vpl
 * @copyright 2012 Juan Carlos Rodríguez-del-Pino
 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
 * @author Juan Carlos Rodríguez-del-Pino <jcrodriguez@dis.ulpgc.es>
 */

defined('MOODLE_INTERNAL') || die();

require_once(dirname(__FILE__).'/../locallib.php');
require_once(dirname(__FILE__).'/../vpl.class.php');
require_once(dirname(__FILE__).'/../vpl_submission.class.php');
require_once(dirname(__FILE__).'/similarity_factory.class.php');
require_once(dirname(__FILE__).'/similarity_sources.class.php');

class vpl_diff {
    /**
     * Remove chars and digits
     *
     * @param $line string
     *            to process
     * @return string without chars and digits
     */
    static public function removealphanum($line) {
        $ret = '';
        $l = strlen( $line );
        // Parse line to remove alphanum chars.
        for ($i = 0; $i < $l; $i ++) {
            $c = $line [$i];
            if (! ctype_alnum( $c ) && $c != ' ') {
                $ret .= $c;
            }
        }
        return $ret;
    }

    /**
     * Calculate the similarity of two lines
     *
     * @param
     *            $line1
     * @param
     *            $line2
     * @return int (3 => trimmed equal, 2 =>removealphanum , 1 => start of line , 0 => not equal)
     */
    static public function diffline($line1, $line2) {
        // TODO Refactor.
        // This is a bad solution that must be rebuild to consider diferent languages.
        // Compare trimed text.
        $line1 = trim( $line1 );
        $line2 = trim( $line2 );
        if ($line1 == $line2) {
            if (strlen( $line1 ) > 0) {
                return 3;
            } else {
                return 1;
            }
        }
        // Compare filtered text (removing alphanum).
        $ran1 = self::removealphanum( $line1 );
        $limit = strlen( $ran1 );
        if ($limit > 0) {
            if ($limit > 3) {
                $limite = 3;
            }
            if (strncmp( $ran1, self::removealphanum( $line2 ), $limit ) == 0) {
                return 2;
            }
        }
        // Compare start of line.
        $l = 4;
        if ($l > strlen( $line1 )) {
            $l = strlen( $line1 );
        }
        if ($l > strlen( $line2 )) {
            $l = strlen( $line2 );
        }
        for ($i = 0; $i < $l; ++ $i) {
            if ($line1 [$i] != $line2 [$i]) {
                break;
            }
        }
        return $i > 0 ? 1 : 0;
    }
    static public function newlineinfo($type, $ln1, $ln2 = 0) {
        $ret = new StdClass();
        $ret->type = $type;
        $ret->ln1 = $ln1;
        $ret->ln2 = $ln2;
        return $ret;
    }

    /**
     * Initialize used matrix
     *
     * @param array $matrix  of arrays to initialize
     * @param array $prev of arrays to initialize
     * @param $nl1 number of rows
     * @param $nl2 number of columns
     * @return void
     */
    static public function initauxiliarmatrices(&$matrix, &$prev, $nl1, $nl2) {
        // Set the matrix[0..nl1+1][0..nl2+1] to 0.
        $row = array_pad( array (), $nl2 + 1, 0 );
        $matrix = array_pad( array (), $nl1 + 1, $row );
        // Set the prev matrix [0..nl1+1][0..nl2+1] to 0.
        $prev = $matrix;

        // Update first column.
        for ($i = 0; $i <= $nl1; $i ++) {
            $matriz [$i] [0] = 0;
            $prev [$i] [0] = - 1;
        }
        // Update first row.
        for ($j = 1; $j <= $nl2; $j ++) {
            $matriz [0] [$j] = 0;
            $prev [0] [$j] = 1;
        }
    }

    static public function similine($line1, $line2, $pattern) {
        return preg_replace($pattern, '', $line1) == preg_replace($pattern, '', $line2);
    }

    /**
     * Calculate diff for two array of lines
     *
     * @param $lines1 array
     *            of string
     * @param $lines2 array
     *            of string
     * @param $detaileddiff bool
     *            whether different levels of similarity should be distinguished
     * @return array of objects with info to show the two array of lines
     */
    static public function calculatediff($lines1, $lines2, $detaileddiff = true) {
        $ret = array ();
        $nl1 = count( $lines1 );
        $nl2 = count( $lines2 );
        if ($nl1 == 0 && $nl2 == 0) {
            return false;
        }
        if ($nl1 == 0) { // There is no first file.
            foreach ($lines2 as $pos => $line) {
                $ret [] = self::newlineinfo( '>', 0, $pos + 1 );
            }
            return $ret;
        }
        if ($nl2 == 0) { // There is no second file.
            foreach ($lines1 as $pos => $line) {
                $ret [] = self::newlineinfo( '<', $pos + 1 );
            }
            return $ret;
        }
        self::initauxiliarmatrices( $matrix, $prev, $nl1, $nl2 );

        // Matrix processing.
        for ($i = 1; $i <= $nl1; $i ++) {
            $line = $lines1 [$i - 1];
            for ($j = 1; $j <= $nl2; $j ++) {
                if ($matrix [$i] [$j - 1] > $matrix [$i - 1] [$j]) {
                    $max = $matrix [$i] [$j - 1];
                    $best = 1;
                } else {
                    $max = $matrix [$i - 1] [$j];
                    $best = - 1;
                }
                if ($detaileddiff) {
                    $prize = self::diffline( $line, $lines2 [$j - 1] );
                } else {
                    $prize = $line === $lines2 [$j - 1] ? 1 : 0;
                }
                if ($matrix [$i - 1] [$j - 1] + $prize >= $max) {
                    $max = $matrix [$i - 1] [$j - 1] + $prize;
                    $best = 0;
                }
                $matrix [$i] [$j] = $max;
                $prev [$i] [$j] = $best;
            }
        }

        // Calculate show info.
        $limit = $nl1 + $nl2;
        $pairs = array ();
        $pi = $nl1;
        $pj = $nl2;
        while ( (! ($pi == 0 && $pj == 0)) && $limit > 0 ) {
            $pair = new stdClass();
            $pair->i = $pi;
            $pair->j = $pj;
            $pairs [] = $pair;
            $p = $prev [$pi] [$pj];
            if ($p == 0) {
                $pi --;
                $pj --;
            } else if ($p == - 1) {
                $pi --;
            } else if ($p == 1) {
                $pj --;
            } else {
                debbuging('error');
            }
            $limit --;
        }

        krsort( $pairs );
        $prevpair = new stdClass();
        $prevpair->i = 0;
        $prevpair->j = 0;
        foreach ($pairs as $pair) {
            if ($pair->i == $prevpair->i + 1 && $pair->j == $prevpair->j + 1) { // Regular advance.
                $l1 = $lines1 [$pair->i - 1];
                $l2 = $lines2 [$pair->j - 1];
                if ($l1 == $l2) { // Equals.
                    $ret [] = self::newlineinfo( '=', $pair->i, $pair->j );
                } else if ( self::similine($l1, $l2, '/\s/')) {
                    $ret [] = self::newlineinfo( '1', $pair->i, $pair->j );
                } else if ( self::similine($l1, $l2, '/(\s|[0-9]|[a-z])/i')) {
                    $ret [] = self::newlineinfo( '2', $pair->i, $pair->j );
                } else {
                    $ret [] = self::newlineinfo( '#', $pair->i, $pair->j );
                }
            } else if ($pair->i == $prevpair->i + 1) { // Removed next line.
                $ret [] = self::newlineinfo( '<', $pair->i, false );
            } else if ($pair->j == $prevpair->j + 1) { // Added one line.
                $ret [] = self::newlineinfo( '>', false, $pair->j );
            } else {
                debugging( "Internal error " . s( $pair ) . " " . s( $prevpair) );
            }
            $prevpair = $pair;
        }
        return $ret;
    }
    static public function show($filename1, $data1, $htmlheader1, $filename2, $data2, $htmlheader2) {
        // Get file lines.
        $nl = vpl_detect_newline( $data1 );
        $lines1 = explode( $nl, $data1 );
        $nl = vpl_detect_newline( $data2 );
        $lines2 = explode( $nl, $data2 );
        // Get dif as an array of info.
        $diff = self::calculatediff( $lines1, $lines2 );
        if ($diff === false) {
            return;
        }
        $separator = array (
                '<' => ' <<< ',
                '>' => ' >>> ',
                '=' => ' === ',
                '1' => ' ==# ',
                '2' => ' =## ',
                '#' => ' ### '
        );
        $emptyline = "\n";
        $data1 = '';
        $data2 = '';
        $datal1 = '';
        $datal2 = '';
        $diffl = '';
        $lines1 [- 1] = '';
        $lines2 [- 1] = '';
        foreach ($diff as $line) {
            $diffl .= $separator [$line->type] . "\n";
            if ($line->ln1) {
                $datal1 .= sprintf("%4d\n", $line->ln1);
                $data1 .= $lines1 [$line->ln1 - 1] . "\n";
            } else {
                if ( $data1 == '' ) {
                    $data1 .= $emptyline;
                    $datal1 .= $emptyline;
                }
                $data1 .= $emptyline;
                $datal1 .= $emptyline;
            }
            if ($line->ln2) {
                $datal2 .= sprintf("%4d\n", $line->ln2);
                $data2 .= $lines2 [$line->ln2 - 1] . "\n";
            } else {
                if ( $data2 == '' ) {
                    $data2 .= $emptyline;
                    $datal2 .= $emptyline;
                }
                $data2 .= $emptyline;
                $datal2 .= $emptyline;
            }
        }
        echo '<div style="width: 100%;min-width: 950px; overflow: auto">';
        // Header.
        echo '<div style="float:left; width: 445px">';
        echo $htmlheader1;
        echo '</div>';
        echo '<div style="float:left; width: 445px">';
        echo $htmlheader2;
        echo '</div>';
        echo '<div style="clear:both;"></div>';
        // Files.
        $pre = '<pre clas="vpl_g">';
        echo '<div style="float:left; text-align: right; width: 3em">';
        $shower = vpl_sh_factory::get_sh( 'a.txt' );
        $shower->print_file( 'a.txt', $datal1, false, count($diff) + 1, false );
        echo '</div>';
        echo '<div style="float:left; width: 390px; overflow:auto">';
        $shower = vpl_sh_factory::get_sh( $filename1 );
        $shower->print_file( $filename1, $data1, false, count($diff) + 1, false );
        echo '</div>';
        echo '<div style="float:left; width: 3em"">';
        $shower = vpl_sh_factory::get_sh( 'b.txt' );
        $shower->print_file( 'b.txt', $diffl, false, count($diff) + 1, false );
        echo '</div>';
        echo '<div style="float:left; text-align: right; width: 3em"">';
        $shower = vpl_sh_factory::get_sh( 'b.txt' );
        $shower->print_file( 'b.txt', $datal2, false, count($diff) + 1, false );
        echo '</div>';
        echo '<div style="float:left; width: 390px; overflow:auto">';
        $shower = vpl_sh_factory::get_sh( $filename2 );
        $shower->print_file( $filename2, $data2, false, count($diff) + 1, false );
        echo '</div>';
        echo '</div>';
        echo '<div style="clear:both;"></div>';
        vpl_sh_factory::syntaxhighlight();
    }
    static public function vpl_get_similfile($f, &$htmlheader, &$filename, &$data) {
        global $DB;
        $htmlheader = '';
        $filename = '';
        $data = '';
        $type = required_param( 'type' . $f, PARAM_INT );
        if ($type == 1) {
            $subid = required_param( 'subid' . $f, PARAM_INT );
            $filename = required_param( 'filename' . $f, PARAM_TEXT );
            $subinstance = $DB->get_record( 'vpl_submissions', array (
                    'id' => $subid
            ) );
            if ($subinstance !== false) {
                $vpl = new mod_vpl( false, $subinstance->vpl );
                $vpl->require_capability( VPL_SIMILARITY_CAPABILITY );
                $submission = new mod_vpl_submission( $vpl, $subinstance );
                $user = $DB->get_record( 'user', array (
                        'id' => $subinstance->userid
                ) );
                if ($user) {
                    $link = vpl_mod_href( '/forms/submissionview.php', 'id', $vpl->get_course_module()->id
                                          , 'userid', $subinstance->userid );
                    $htmlheader .= '<a href="' . $link . '">';
                }
                $htmlheader .= s( $filename ) . ' ';
                if ($user) {
                    $htmlheader .= '</a>';
                    $htmlheader .= $vpl->user_fullname_picture( $user );
                }
                $fg = $submission->get_submitted_fgm();
                $data = $fg->getFileData( $filename );
                \mod_vpl\event\vpl_diff_viewed::log( $submission );
            }
        } else if ($type == 3) {
            global $CFG;
            $data = '';
            $vplid = required_param( 'vplid' . $f, PARAM_INT );
            $vpl = new mod_vpl( false, $vplid );
            $vpl->require_capability( VPL_SIMILARITY_CAPABILITY );
            $zipname = required_param( 'zipfile' . $f, PARAM_RAW );
            $filename = required_param( 'filename' . $f, PARAM_RAW );
            $htmlheader .= $filename . ' ' . optional_param( 'username' . $f, '', PARAM_TEXT );
            $ext = strtoupper( pathinfo( $zipname, PATHINFO_EXTENSION ) );
            if ($ext != 'ZIP') {
                print_error( 'nozipfile' );
            }
            $zip = new ZipArchive();
            $zipfilename = vpl_similarity_preprocess::get_zip_filepath( $vplid, $zipname );
            if ($zip->open( $zipfilename ) === true) {
                $data = $zip->getFromName( $filename );
                $zip->close();
            }
        } else {
            print_error( 'type error' );
        }
    }

    /**
     * Computes diff between two lines.
     * @param string $line1
     * @param string $line2
     * @return int The diff in number of chars.
     */
    static public function compute_linediff($line1, $line2) {
        $n1 = strlen($line1);
        $n2 = strlen($line2);
        if ($n1 == 0 || $n2 == 0) {
            return $n1 + $n2;
        }
        $queue = array_fill(0, $n1 + $n2 + 1, array());
        $done = array_fill(0, $n1 + 1, array());
        $z = new stdClass();
        $z->i1 = 0;
        $z->i2 = 0;
        $z->d = 0;
        $queue[0][] = $z;
        $priority = 0;

        // A-star search of shortest diff.
        while (true) {
            while (count($queue[$priority]) == 0) {
                $priority++;
            }
            $z = array_pop($queue[$priority]);
            $i1 = $z->i1;
            $i2 = $z->i2;
            if (isset($done[$i1][$i2])) {
                continue;
            } else {
                $done[$i1][$i2] = true;
            }

            // Identical substring: 0 distance.
            while ($i1 < $n1 && $i2 < $n2 && $line1{$i1} == $line2{$i2}) {
                $i1++;
                $i2++;
                $done[$i1][$i2] = true;
            }

            if ($i1 == $n1 && $i2 == $n2) {
                // Found shortest diff.
                return $z->d;
            }

            // Character addition in line1.
            if ($i1 < $n1 && !isset($done[$i1 + 1][$i2])) {
                $z1 = new stdClass();
                $z1->i1 = $i1 + 1;
                $z1->i2 = $i2;
                $z1->d = $z->d + 1;
                $queue[$z1->d + abs(($n1 - $z1->i1) - ($n2 - $z1->i2))][] = $z1;
            }

            // Character addition in line2.
            if ($i2 < $n2 && !isset($done[$i1][$i2 + 1])) {
                $z2 = new stdClass();
                $z2->i1 = $i1;
                $z2->i2 = $i2 + 1;
                $z2->d = $z->d + 1;
                $queue[$z2->d + abs(($n1 - $z2->i1) - ($n2 - $z2->i2))][] = $z2;
            }

            // Character change.
            if ($i1 < $n1 && $i2 < $n2 && !isset($done[$i1 + 1][$i2 + 1])) {
                $z3 = new stdClass();
                $z3->i1 = $i1 + 1;
                $z3->i2 = $i2 + 1;
                $z3->d = $z->d + 1;
                $queue[$z3->d + abs(($n1 - $z3->i1) - ($n2 - $z3->i2))][] = $z3;
            }
        }
    }

    /**
     * Computes diff between two files.
     * @param string $filedata1
     * @param string $filedata2
     * @param int $timelimit The maximum amount of time to spend on this computation (in milliseconds).
     *  If provided and the time limit is exceeded, a moodle_exception with code 'difftoolarge' will be thrown.
     * @return int The diff in number of chars.
     */
    static public function compute_filediff($filedata1, $filedata2, $timelimit=0) {
        if (strlen($filedata1) == 0 || strlen($filedata2) == 0) {
            return strlen($filedata1) + strlen($filedata2);
        }
        $starttime = microtime(true);
        $lines1 = explode("\n", $filedata1);
        $lines2 = explode("\n", $filedata2);
        $n1 = count($lines1);
        $n2 = count($lines2);
        $queue = array_fill(0, $n1 + $n2 + 1, array());
        $prev = array_fill(0, $n1 + 1, array());
        $z = new stdClass();
        $z->i1 = 0;
        $z->i2 = 0;
        $z->d = 0;
        $z->prev = 0;
        $queue[0][] = $z;
        $priority = 0;

        // A-star search of shortest diff.
        while (true) {
            if ($timelimit > 0 && microtime(true) - $starttime > $timelimit / 1000) {
                // Time out.
                throw new moodle_exception('difftoolarge');
            }
            while (count($queue[$priority]) == 0) {
                $priority++;
            }
            $z = array_pop($queue[$priority]);
            $i1 = $z->i1;
            $i2 = $z->i2;
            if (isset($prev[$i1][$i2])) {
                continue;
            } else {
                $prev[$i1][$i2] = $z->prev;
            }

            // Identical substring: 0 distance.
            while ($i1 < $n1 && $i2 < $n2 && $lines1[$i1] == $lines2[$i2]) {
                $i1++;
                $i2++;
                $prev[$i1][$i2] = 0;
            }

            if ($i1 == $n1 && $i2 == $n2) {
                // Found shortest diff amongst lines.
                // Backtrack to compute total diff as a sum of lines diff.
                $totaldiff = 0;
                while ($i1 > 0 || $i2 > 0) {
                    if (!isset($prev[$i1][$i2])) {
                        $i1--;
                        $i2--;
                    } else {
                        switch ($prev[$i1][$i2]) {
                            case 1 :
                                $totaldiff += strlen( $lines1[$i1 - 1] ) + 1;
                                $i1--;
                                break;
                            case 2 :
                                $totaldiff += strlen( $lines2[$i2 - 1] ) + 1;
                                $i2--;
                                break;
                            case 3 :
                                $totaldiff += self::compute_linediff( $lines1[$i1 - 1], $lines2[$i2 - 1] );
                            case 0 :
                                $i1--;
                                $i2--;
                                break;
                        }
                    }
                }
                return $totaldiff;
            }

            // Line addition in file1.
            if ($i1 < $n1 && !isset($prev[$i1 + 1][$i2])) {
                $z1 = new stdClass();
                $z1->i1 = $i1 + 1;
                $z1->i2 = $i2;
                $z1->d = $z->d + 1;
                $z1->prev = 1;
                $queue[$z1->d + abs(($n1 - $z1->i1) - ($n2 - $z1->i2))][] = $z1;
            }

            // Line addition in file2.
            if ($i2 < $n2 && !isset($prev[$i1][$i2 + 1])) {
                $z2 = new stdClass();
                $z2->i1 = $i1;
                $z2->i2 = $i2 + 1;
                $z2->d = $z->d + 1;
                $z2->prev = 2;
                $queue[$z2->d + abs(($n1 - $z2->i1) - ($n2 - $z2->i2))][] = $z2;
            }

            // Line change.
            if ($i1 < $n1 && $i2 < $n2 && !isset($prev[$i1 + 1][$i2 + 1])) {
                $z3 = new stdClass();
                $z3->i1 = $i1 + 1;
                $z3->i2 = $i2 + 1;
                $z3->d = $z->d + 1;
                $z3->prev = 3;
                $queue[$z3->d + abs(($n1 - $z3->i1) - ($n2 - $z3->i2))][] = $z3;
            }
        }
    }
}