Pascal Diff, Version 0.3 (alt)

Hinweis: dies ist nicht die neueste Version!

Zurück zur Übersicht

Datei: diff.pas

{
  Diff.pas

  Copyright (C) 2005 Volker Siebert, Germany
  All rights reserved.

DESCRIPTION

  This unit for Delphi 5, 6, 7, and 2005 contains code to compare to text
  documents stored in TString objects line by line in the way the classical
  unix diff program works.

  Sample usage:

    Diff := TTextComparer.Create;
    try
      Diff.Compare(Strings1, Strings2);
      Label1.Caption := Format('There are %d differences', [Diff.Count]);
      // Process Diff.Diff[x].(Left|Right).(Start.Stop)
    finally
      Diff.Free;
    end;

  TTextComparer uses a hash on all lines to be able to compare them fast.
  If you provide an own compare function (i.e. not case sensitive), then
  you also have to provide an own hash function.

  The only thing the hash function must ensure is that if two strings match
  by your custom compare function, i.e. "MyOwnCompareFunction(s1, s2) = true",
  then both strings must have the same hash using your custom hash function,
  i.e., "MyOwnHashFunction(s1) = MyOwnHashFunction(s2)".

  Note: it will also work if you just a provide 0 as every hash value, but
  it will slow down the algorithm.

LICENSE

  The contents of this file are subject to the Mozilla Public License
  Version 1.1 (the "License"); you may not use this file except in
  compliance with the License. You may obtain a copy of the License at
  http://www.mozilla.org/MPL/

  Software distributed under the License is distributed on an "AS IS"
  basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
  License for the specific language governing rights and limitations
  under the License.

CHANGELOG

  2005-08-01: Version 0.3
    - Modified CountEqualLines to perform a pre-scan on the hash codes only
      (significant speed-up)
    - Changed info structure from object to record
    - Modified heuristic handling

  2005-07-29: Version 0.2
    - Added TStandardTextComparer

  2005-07-29: Version 0.1
    - Initial version
}

unit Diff;

interface

uses
  SysUtils, Classes;

type
  TDiffHashValue = Cardinal;

  TRangeOfLines = record
    Start: integer;  // including
    Stop: integer;   // excluding
  end;

  PDifference = ^TDifference;
  TDifference = record
    Left: TRangeOfLines;   // left file (first one)
    Right: TRangeOfLines;  // right file
  end;

  PDifferenceArray = ^TDifferenceArray;
  TDifferenceArray = array [0 .. MaxInt div 32] of TDifference;

  ETextComparerError = class(Exception);

  TTextComparerHash = function(const s: string): TDiffHashValue of object; register;
  TTextComparerCompare = function(const s1, s2: string): boolean of object; register;

  TCustomTextComparer = class
  private
    FDiffs: PDifferenceArray;
    FCount: integer;
    FAlloc: integer;
    FHeuristic: integer;
    FOnHash: TTextComparerHash;
    FOnCompare: TTextComparerCompare;
    function GetDiff(Index: integer): TDifference;
    procedure SetSize(NewCount: integer);
  protected
    function DefaultHash(const s: string): TDiffHashValue; register;
    function DefaultCompare(const s1, s2: string): boolean; register;
    property OnHash: TTextComparerHash read FOnHash write FOnHash;
    property OnCompare: TTextComparerCompare read FOnCompare write FOnCompare;
  public
    constructor Create;
    destructor Destroy; override;
    procedure Clear;
    procedure Compare(List1, List2: TStrings);
    property Count: integer read FCount;
    property Diff[Index: integer]: TDifference read GetDiff; default;
    property Heuristic: integer read FHeuristic write FHeuristic;
  end;

  TTextComparer = class(TCustomTextComparer)
  public
    property OnHash;
    property OnCompare;
  end;

  TStandardTextComparer = class(TCustomTextComparer)
  private
    FIgnoreCase: boolean;
    FIgnoreSpaces: boolean;
    FSpacesAsOne: boolean;
  protected
    function Hash_ynn(const s: string): TDiffHashValue; register;
    function Hash_nyn(const s: string): TDiffHashValue; register;
    function Hash_yyn(const s: string): TDiffHashValue; register;
    function Hash_nny(const s: string): TDiffHashValue; register;
    function Hash_yny(const s: string): TDiffHashValue; register;
    function Compare_ynn(const s1, s2: string): boolean; register;
    function Compare_nyn(const s1, s2: string): boolean; register;
    function Compare_yyn(const s1, s2: string): boolean; register;
    function Compare_nny(const s1, s2: string): boolean; register;
    function Compare_yny(const s1, s2: string): boolean; register;
  public
    procedure Compare(List1, List2: TStrings);
    property IgnoreCase: boolean read FIgnoreCase write FIgnoreCase;
    property IgnoreSpaces: boolean read FIgnoreSpaces write FIgnoreSpaces;
    property SpacesAsOne: boolean read FSpacesAsOne write FSpacesAsOne;
  end;

implementation

constructor TCustomTextComparer.Create;
begin
  inherited;
  FDiffs := nil;
  FCount := 0;
  FAlloc := 0;
  FHeuristic := 50;
end;

destructor TCustomTextComparer.Destroy;
begin
  Clear;
  inherited;
end;

procedure TCustomTextComparer.SetSize(NewCount: integer);
begin
  FAlloc := NewCount;
  ReAllocMem(FDiffs, FAlloc * SizeOf(TDifference));
  if FCount > FAlloc then
    FCount := FAlloc;
end;

procedure TCustomTextComparer.Clear;
begin
  SetSize(0);
end;

function TCustomTextComparer.GetDiff(Index: integer): TDifference;
begin
  if (Index < 0) or (Index >= FCount) then
    raise ETextComparerError.Create('Invalid item index');

  Result := FDiffs^[Index];
end;

function TCustomTextComparer.DefaultHash(const s: string): TDiffHashValue;
var
  k, len: integer;
begin
  Result := 0;
  len := Length(s);
  for k := 1 to len do
    Result := 19 * Result + Ord(s[k]);
end;

function TCustomTextComparer.DefaultCompare(const s1, s2: string): boolean;
begin
  Result := s1 = s2;
end;

{----------------------------------------------------------------------}

procedure TCustomTextComparer.Compare(List1, List2: TStrings);
type
  TFileInfo = record
    Strings: TStrings;
    Count: integer;
    LineHash: array of TDiffHashValue;    // [0 ..     Count - 1]
    HashLine: array of integer;           // [0 .. 2 * Count - 1]
    HashColl: array of integer;           // [0 .. 2 * Count - 1]
  end;

var
  HashFunc: TTextComparerHash;
  CompareFunc: TTextComparerCompare;
  Info1, Info2: TFileInfo;

  procedure InitFileInfo(var Info: TFileInfo; AStrings: TStrings);
  var
    idx, back, coll: Integer;
  begin
    with Info do
    begin
      Strings := AStrings;
      Count := Strings.Count;
      SetLength(LineHash, Count);
      SetLength(HashLine, 2 * Count);
      SetLength(HashColl, 2 * Count);

      if Count < 1 then
        exit;

      // The upper half of HashLine/HashColl does not need to be initialized.
      for idx := 0 to Count - 1 do
      begin
        LineHash[idx] := HashFunc(Strings[idx]);
        HashLine[idx] := -1;
        HashColl[idx] := -1;
      end;

      coll := Count - 1;
      for idx := 0 to Count - 1 do
      begin
        back := integer(Cardinal(LineHash[idx]) mod Cardinal(Count));
        if HashLine[back] < 0 then
          HashLine[back] := idx
        else
        begin
          inc(coll);
          HashLine[coll] := HashLine[back];
          HashColl[coll] := HashColl[back];
          HashLine[back] := idx;
          HashColl[back] := coll;
        end;
      end;
    end;
  end;

  procedure AddDiff(l1a, l1e, l2a, l2e: integer);
  begin
    if (l1a >= l1e) and (l2a >= l2e) then
      exit;

    if FCount >= FAlloc then
      SetSize(FAlloc + 32);

    with FDiffs^[FCount] do
    begin
      Left.Start := l1a;
      Left.Stop := l1e;
      Right.Start := l2a;
      Right.Stop := l2e;
    end;

    inc(FCount);
  end;

  function DistanceLess(l1a, l1e, l2a, l2e: integer;
                        bp1, bp2, np1, np2: integer): boolean;
  var
    t1, b1, t2, b2: integer;
  begin
    t1 := abs((bp1 - l1a) - (bp2 - l2a));
    b1 := abs((l1e - bp1) - (l2e - bp2));
    t2 := abs((np1 - l2a) - (np2 - l2a));
    b2 := abs((l2e - np1) - (l2e - np2));
    Result := (t2 + b2) < (t1 + b1);
  end;

  function CountEqualLines(l1p, l1e, l2p, l2e, bsz: integer): integer;
  var
    max: integer;
  begin
    // Prescan: compare the hashcodes
    max := 0;
    while (l1p + max < l1e) and
          (l2p + max < l2e) and
          (Info1.LineHash[l1p + max] = Info2.LineHash[l2p + max]) do
      inc(max);

    // Better match possible?
    Result := 0;
    if max < bsz then
      exit;

    // Final scan: really compare the strings
    while (Result < max) and
          CompareFunc(Info1.Strings[l1p], Info2.Strings[l2p]) do
    begin
      inc(l1p);
      inc(l2p);
      inc(Result);
    end;
  end;

  procedure DiffRange(l1a, l1e, l2a, l2e: integer);
  var
    bp1, bp2, bsz, scan, idx, pos, cnt: integer;
  begin
    while true do
    begin
      if (l1a >= l1e) or (l2a >= l2e) then
        break;

      bp1 := -1;
      bp2 := -1;
      bsz := 0;

      if l1e - l1a < l2e - l2a then
      begin
        // Block 1 is smaller
        scan := l1a;
        while scan < l1e - bsz do
        begin
          idx := integer(Cardinal(Info1.LineHash[scan]) mod Cardinal(Info2.Count));
          repeat
            pos := Info2.HashLine[idx];

            // Trick: the lines have been added from 0 up to Count-1. So, for
            // colliding hashes, the lines are stored in REVERSE order. Thus,
            // when we find a line BEFORE l2a, we're done with this block.
            // This also catches the -1 at the end of the queue.
            if pos < l2a then
              break;

            if pos + bsz <= l2e then
            begin
              cnt := CountEqualLines(scan, l1e, pos, l2e, bsz);
              if (cnt > bsz) or
                 ((cnt = bsz) and DistanceLess(l1a, l1e, l2a, l2e, bp1, bp2, scan, pos)) then
              begin
                bsz := cnt;
                bp1 := scan;
                bp2 := pos;
              end;
            end;

            idx := Info2.HashColl[idx];
          until idx < 0;

          if (FHeuristic > 0) and (bsz >= FHeuristic) then
            break;

          inc(scan);
        end;
      end
      else
      begin
        // Block 2 is smaller
        scan := l2a;
        while scan < l2e - bsz do
        begin
          idx := integer(Cardinal(Info2.LineHash[scan]) mod Cardinal(Info1.Count));
          repeat
            pos := Info1.HashLine[idx];

            // Trick: the lines have been added from 0 up to Count-1. So, for
            // colliding hashes, the lines are stored in REVERSE order. Thus,
            // when we find a line BEFORE l1a, we're done with this block.
            // This also catches the -1 at the end of the queue.
            if pos < l1a then
              break;

            if pos + bsz <= l1e then
            begin
              cnt := CountEqualLines(pos, l1e, scan, l2e, bsz);
              if (cnt > bsz) or
                 ((cnt = bsz) and DistanceLess(l1a, l1e, l2a, l2e, bp1, bp2, pos, scan)) then
              begin
                bsz := cnt;
                bp1 := pos;
                bp2 := scan;
              end;
            end;

            idx := Info1.HashColl[idx];
          until idx < 0;

          if (FHeuristic > 0) and (bsz >= FHeuristic) then
            break;

          inc(scan);
        end;
      end;

      if bsz = 0 then
        break;

      // This way the diffs are always sorted correctly
      DiffRange(l1a, bp1, l2a, bp2);
      l1a := bp1 + bsz;
      l2a := bp2 + bsz;

      // This way the recursion would be optimized, but we had to
      // sort the diffs after all passes
      {
      if (bp1 - l1a) + (bp2 - l2a) < (l1e - bp1 - bsz) + (l2e - bp2 - bsz) then
      begin
        DiffRange(l1a, bp1, l2a, bp2);
        l1a := bp1 + bsz;
        l2a := bp2 + bsz;
      end
      else
      begin
        DiffRange(bp1 + bsz, l1e, bp2 + bsz, l2e);
        l1e := bp1;
        l2e := bp2;
      end;
      }
    end;

    AddDiff(l1a, l1e, l2a, l2e);
  end;

begin
  Clear;

  if Assigned(FOnHash) and Assigned(FOnCompare) then
  begin
    HashFunc := FOnHash;
    CompareFunc := FOnCompare;
  end
  else
  begin
    HashFunc := DefaultHash;
    CompareFunc := DefaultCompare;
  end;

  // Delphi does Initialize/Finalize, no need to protect with try-finally
  InitFileInfo(Info1, List1);
  InitFileInfo(Info2, List2);
  DiffRange(0, Info1.Count, 0, Info2.Count);
end;

{----------------------------------------------------------------------}

function RemoveSpaces(const s: string): string;
var
  len, p: integer;
  pc: PChar;
begin
  len := Length(s);
  SetLength(Result, len);

  pc := @s[1];
  p := 0;
  while len > 0 do
  begin
    if not (pc^ in [#0, #9, #10, #13, #32]) then
    begin
      inc(p);
      Result[p] := pc^;
    end;

    inc(pc);
    dec(len);
  end;

  SetLength(Result, p);
end;

function ReduceSpaces(const s: string): string;
var
  len, p, sp: integer;
  pc: PChar;
begin
  len := Length(s);
  SetLength(Result, len);

  pc := @s[1];
  p := 0;
  sp := 0;
  while len > 0 do
  begin
    if pc^ in [#0, #9, #10, #13, #32] then
      inc(sp, p)
    else
    begin
      if sp > 0 then
      begin
        sp := 0;
        inc(p);
        Result[p] := ' ';
      end;

      inc(p);
      Result[p] := pc^;
    end;

    inc(pc);
    dec(len);
  end;

  SetLength(Result, p);
end;

{----------------------------------------------------------------------}

function TStandardTextComparer.Hash_ynn(const s: string): TDiffHashValue;
begin
  Result := DefaultHash(Lowercase(s));
end;

function TStandardTextComparer.Hash_nyn(const s: string): TDiffHashValue;
begin
  Result := DefaultHash(RemoveSpaces(s));
end;

function TStandardTextComparer.Hash_yyn(const s: string): TDiffHashValue;
begin
  Result := DefaultHash(Lowercase(RemoveSpaces(s)));
end;

function TStandardTextComparer.Hash_nny(const s: string): TDiffHashValue;
begin
  Result := DefaultHash(ReduceSpaces(s));
end;

function TStandardTextComparer.Hash_yny(const s: string): TDiffHashValue;
begin
  Result := DefaultHash(Lowercase(ReduceSpaces(s)));
end;

function TStandardTextComparer.Compare_ynn(const s1, s2: string): boolean;
begin
  Result := Lowercase(s1) = Lowercase(s2);
end;

function TStandardTextComparer.Compare_nyn(const s1, s2: string): boolean;
begin
  Result := RemoveSpaces(s1) = RemoveSpaces(s2);
end;

function TStandardTextComparer.Compare_yyn(const s1, s2: string): boolean;
begin
  Result := Lowercase(RemoveSpaces(s1)) = Lowercase(RemoveSpaces(s2));
end;

function TStandardTextComparer.Compare_nny(const s1, s2: string): boolean;
begin
  Result := ReduceSpaces(s1) = ReduceSpaces(s2);
end;

function TStandardTextComparer.Compare_yny(const s1, s2: string): boolean;
begin
  Result := Lowercase(ReduceSpaces(s1)) = Lowercase(ReduceSpaces(s2));
end;

procedure TStandardTextComparer.Compare(List1, List2: TStrings);
var
  HashFunc: TTextComparerHash;
  CompareFunc: TTextComparerCompare;
begin
  HashFunc := FOnHash;
  CompareFunc := FOnCompare;

  try
    case 4 * Ord(FIgnoreCase) + 2 * Ord(FIgnoreSpaces) + Ord(FSpacesAsOne) of
      0 + 0 + 0: begin
        FOnHash := nil;
        FOnCompare := nil;
      end;
      0 + 0 + 1: begin
        FOnHash := Hash_nny;
        FOnCompare := Compare_nny;
      end;
      0 + 2 + 0,
      0 + 2 + 1: begin
        FOnHash := Hash_nyn;
        FOnCompare := Compare_nyn;
      end;
      4 + 0 + 0: begin
        FOnHash := Hash_ynn;
        FOnCompare := Compare_ynn;
      end;
      4 + 0 + 1: begin
        FOnHash := Hash_yny;
        FOnCompare := Compare_yny;
      end;
      4 + 2 + 0,
      4 + 2 + 1: begin
        FOnHash := Hash_yyn;
        FOnCompare := Compare_yyn;
      end;
    end;

    inherited Compare(List1, List2);
  finally
    FOnHash := HashFunc;
    FOnCompare := CompareFunc;
  end;
end;

end.
Flocke's Garage
Valid HTML 4.01 Transitional Valid CSS!
(C) 2005-2018 Volker Siebert.
Creative Commons-LizenzvertragDer gesamte Inhalt dieser Webseite steht unter einer Creative Commons-Lizenz (sofern nicht anders angegeben).