Pascal Diff, Version 0.2 (alt)

Hinweis: dies ist nicht die neueste Version!

Zurück zur Übersicht

Datei: diff.pas

{
  Diff.pas

  Copyright (C) 2005 Volker Siebert, Germany
  All rights reserved.

DESCRIPTION

  This unit for Delphi 5, 6, 7, and 2005 contains code to compare to text
  documents stored in TString objects line by line in the way the classical
  unix diff program works.

  Sample usage:

    Diff := TTextComparer.Create;
    try
      Diff.Compare(Strings1, Strings2);
      Label1.Caption := Format('There are %d differences', [Diff.Count]);
      // Process Diff.Diff[x].(Left|Right).(Start.Stop)
    finally
      Diff.Free;
    end;

  TTextComparer uses a hash on all lines to be able to compare them fast.
  If you provide an own compare function (i.e. not case sensitive), then
  you also have to provide an own hash function.

  The only thing the hash function must ensure is that if two strings match
  by your custom compare function, i.e. "MyOwnCompareFunction(s1, s2) = true",
  then both strings must have the same hash using your custom hash function,
  i.e., "MyOwnHashFunction(s1) = MyOwnHashFunction(s2)".

  Note: it will also work if you just a provide 0 as every hash value, but
  it will slow down the algorithm.

LICENSE

  The contents of this file are subject to the Mozilla Public License
  Version 1.1 (the "License"); you may not use this file except in
  compliance with the License. You may obtain a copy of the License at
  http://www.mozilla.org/MPL/

  Software distributed under the License is distributed on an "AS IS"
  basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
  License for the specific language governing rights and limitations
  under the License.

CHANGELOG

  2005-07-29: Version 0.2
    - Added TStandardTextComparer

  2005-07-29: Version 0.1
    - Initial version
}

unit Diff;

interface

uses
  SysUtils, Classes;

type
  TDiffHashValue = Cardinal;

  TRangeOfLines = record
    Start: integer;  // including
    Stop: integer;   // excluding
  end;

  PDifference = ^TDifference;
  TDifference = record
    Left: TRangeOfLines;   // left file (first one)
    Right: TRangeOfLines;  // right file
  end;

  PDifferenceArray = ^TDifferenceArray;
  TDifferenceArray = array [0 .. MaxInt div 32] of TDifference;

  ETextComparerError = class(Exception);

  TTextComparerHash = function(const s: string): TDiffHashValue of object; register;
  TTextComparerCompare = function(const s1, s2: string): boolean of object; register;

  TCustomTextComparer = class
  private
    FDiffs: PDifferenceArray;
    FCount: integer;
    FAlloc: integer;
    FHeuristic: integer;
    FOnHash: TTextComparerHash;
    FOnCompare: TTextComparerCompare;
    function GetDiff(Index: integer): TDifference;
    procedure SetSize(NewCount: integer);
  protected
    function DefaultHash(const s: string): TDiffHashValue; register;
    function DefaultCompare(const s1, s2: string): boolean; register;
    property OnHash: TTextComparerHash read FOnHash write FOnHash;
    property OnCompare: TTextComparerCompare read FOnCompare write FOnCompare;
  public
    constructor Create;
    destructor Destroy; override;
    procedure Clear;
    procedure Compare(List1, List2: TStrings);
    property Count: integer read FCount;
    property Diff[Index: integer]: TDifference read GetDiff; default;
    property Heuristic: integer read FHeuristic write FHeuristic;
  end;

  TTextComparer = class(TCustomTextComparer)
  public
    property OnHash;
    property OnCompare;
  end;

  TStandardTextComparer = class(TCustomTextComparer)
  private
    FIgnoreCase: boolean;
    FIgnoreSpaces: boolean;
    FSpacesAsOne: boolean;
  protected
    function Hash_ynn(const s: string): TDiffHashValue; register;
    function Hash_nyn(const s: string): TDiffHashValue; register;
    function Hash_yyn(const s: string): TDiffHashValue; register;
    function Hash_nny(const s: string): TDiffHashValue; register;
    function Hash_yny(const s: string): TDiffHashValue; register;
    function Compare_ynn(const s1, s2: string): boolean; register;
    function Compare_nyn(const s1, s2: string): boolean; register;
    function Compare_yyn(const s1, s2: string): boolean; register;
    function Compare_nny(const s1, s2: string): boolean; register;
    function Compare_yny(const s1, s2: string): boolean; register;
  public
    procedure Compare(List1, List2: TStrings);
    property IgnoreCase: boolean read FIgnoreCase write FIgnoreCase;
    property IgnoreSpaces: boolean read FIgnoreSpaces write FIgnoreSpaces;
    property SpacesAsOne: boolean read FSpacesAsOne write FSpacesAsOne;
  end;

implementation

constructor TCustomTextComparer.Create;
begin
  inherited;
  FDiffs := nil;
  FCount := 0;
  FAlloc := 0;
  FHeuristic := 40;
end;

destructor TCustomTextComparer.Destroy;
begin
  Clear;
  inherited;
end;

procedure TCustomTextComparer.SetSize(NewCount: integer);
begin
  FAlloc := NewCount;
  ReAllocMem(FDiffs, FAlloc * SizeOf(TDifference));
  if FCount > FAlloc then
    FCount := FAlloc;
end;

procedure TCustomTextComparer.Clear;
begin
  SetSize(0);
end;

function TCustomTextComparer.GetDiff(Index: integer): TDifference;
begin
  if (Index < 0) or (Index >= FCount) then
    raise ETextComparerError.Create('Invalid item index');

  Result := FDiffs^[Index];
end;

function TCustomTextComparer.DefaultHash(const s: string): TDiffHashValue;
var
  k, len: integer;
begin
  Result := 0;
  len := Length(s);
  for k := 1 to len do
    Result := 19 * Result + Ord(s[k]);
end;

function TCustomTextComparer.DefaultCompare(const s1, s2: string): boolean;
begin
  Result := s1 = s2;
end;

{----------------------------------------------------------------------}

function HashToRange(Hash: TDiffHashValue; Max: integer): integer; inline;
begin
  Result := Integer(Hash) mod Max;
  if Result < 0 then
    inc(Result, Max);
end;

type
  TFileInfo = class
  public
    Strings: TStrings;
    Count: integer;
    LineHash: array of TDiffHashValue;    // [0 .. Count]
    HashLine: array of integer;           // [0 .. 2 * Count - 1]
    HashColl: array of integer;           // [0 .. 2 * Count - 1]

    procedure Init(AStrings: TStrings; HashFunc: TTextComparerHash);
    destructor Destroy; override;
  end;

procedure TFileInfo.Init(AStrings: TStrings; HashFunc: TTextComparerHash);
var
  idx, back, coll: Integer;
begin
  Strings := AStrings;
  Count := Strings.Count;
  SetLength(LineHash, Count + 1);
  SetLength(HashLine, 2 * Count);
  SetLength(HashColl, 2 * Count);

  if Count < 0 then
    exit;

  LineHash[Count] := HashFunc('');

  for idx := 0 to Count - 1 do
  begin
    LineHash[idx] := HashFunc(Strings[idx]);
    HashLine[idx] := -1;
    HashColl[idx] := -1;
  end;

  coll := Count - 1;
  for idx := 0 to Count - 1 do
  begin
    back := HashToRange(LineHash[idx], Count);
    if HashLine[back] < 0 then
      HashLine[back] := idx
    else
    begin
      inc(coll);
      HashLine[coll] := HashLine[back];
      HashColl[coll] := HashColl[back];
      HashLine[back] := idx;
      HashColl[back] := coll;
    end;
  end;
end;

destructor TFileInfo.Destroy;
begin
  SetLength(LineHash, 0);
  SetLength(HashLine, 0);
  SetLength(HashColl, 0);
end;

{----------------------------------------------------------------------}

procedure TCustomTextComparer.Compare(List1, List2: TStrings);
var
  HashFunc: TTextComparerHash;
  CompareFunc: TTextComparerCompare;
  Info1, Info2: TFileInfo;

  procedure AddDiff(l1a, l1e, l2a, l2e: integer);
  begin
    if (l1a >= l1e) and (l2a >= l2e) then
      exit;

    if FCount >= FAlloc then
      SetSize(FAlloc + 32);

    with FDiffs^[FCount] do
    begin
      Left.Start := l1a;
      Left.Stop := l1e;
      Right.Start := l2a;
      Right.Stop := l2e;
    end;

    inc(FCount);
  end;

  function DistanceLess(l1a, l1e, l2a, l2e: integer;
                        bp1, bp2, np1, np2: integer): boolean;
  var
    t1, b1, t2, b2: integer;
  begin
    t1 := abs((bp1 - l1a) - (bp2 - l2a));
    b1 := abs((l1e - bp1) - (l2e - bp2));
    t2 := abs((np1 - l2a) - (np2 - l2a));
    b2 := abs((l2e - np1) - (l2e - np2));
    Result := (t2 + b2) < (t1 + b1);
  end;

  function SameLines(l1p, l1e, l2p, l2e: integer): integer;
  begin
    Result := 0;
    while (l1p < l1e) and
          (l2p < l2e) and
          (Info1.LineHash[l1p] = Info2.LineHash[l2p]) and
          CompareFunc(Info1.Strings[l1p], Info2.Strings[l2p]) do
    begin
      inc(l1p);
      inc(l2p);
      inc(Result);
    end;
  end;

  procedure DiffRange(l1a, l1e, l2a, l2e: integer);
  var
    l1s, l2s, maxm, bp1, bp2, bsz, scan, idx, pos, cnt: integer;
  begin
    while true do
    begin
      l1s := l1e - l1a;
      l2s := l2e - l2a;
      if l1s > l2s then
        maxm := l2s
      else
        maxm := l1s;

      if maxm = 0 then
        break;

      bp1 := -1;
      bp2 := -1;
      bsz := 0;

      if l1s < l2s then
      begin
        // Block 1 is smaller
        scan := l1a;
        while scan < l1e - bsz do
        begin
          idx := HashToRange(Info1.LineHash[scan], Info2.Count);
          repeat
            pos := Info2.HashLine[idx];
            if pos < l2a then
              break; // <- catches also -1!

            if pos + bsz <= l2e then
            begin
              cnt := SameLines(scan, l1e, pos, l2e);

              if cnt = bsz then
                if DistanceLess(l1a, l1e, l2a, l2e, bp1, bp2, scan, pos) then
                  dec(bsz);

              if cnt > bsz then
              begin
                bsz := cnt;
                bp1 := scan;
                bp2 := pos;
              end;
            end;

            idx := Info2.HashColl[idx];
          until idx < 0;

          if bsz > FHeuristic then break;
          if bsz + bsz > maxm then break;

          inc(scan);
        end;
      end
      else
      begin
        // Block 2 is smaller
        scan := l2a;
        while scan < l2e - bsz do
        begin
          idx := HashToRange(Info2.LineHash[scan], Info1.Count);
          repeat
            pos := Info1.HashLine[idx];
            if pos < l1a then
              break; // <- catches also -1!

            if pos + bsz <= l1e then
            begin
              cnt := SameLines(pos, l1e, scan, l2e);

              if cnt = bsz then
                if DistanceLess(l1a, l1e, l2a, l2e, bp1, bp2, pos, scan) then
                  dec(bsz);

              if cnt > bsz then
              begin
                bsz := cnt;
                bp1 := pos;
                bp2 := scan;
              end;
            end;

            idx := Info1.HashColl[idx];
          until idx < 0;

          if bsz > FHeuristic then break;
          if bsz + bsz > maxm then break;

          inc(scan);
        end;
      end;

      if bsz = 0 then
        break;

      // This way the diffs are always sorted correctly
      {
      if (bp1 - l1a) + (bp2 - l2a) < (l1e - bp1 - bsz) + (l2e - bp2 - bsz) then
      begin
      }
        DiffRange(l1a, bp1, l2a, bp2);
        l1a := bp1 + bsz;
        l2a := bp2 + bsz;
      {
      end
      else
      begin
        DiffRange(bp1 + bsz, l1e, bp2 + bsz, l2e);
        l1e := bp1;
        l2e := bp2;
      end;
      }
    end;

    AddDiff(l1a, l1e, l2a, l2e);
  end;

begin
  Clear;

  if Assigned(FOnHash) and Assigned(FOnCompare) then
  begin
    HashFunc := FOnHash;
    CompareFunc := FOnCompare;
  end
  else
  begin
    HashFunc := DefaultHash;
    CompareFunc := DefaultCompare;
  end;

  Info1 := TFileInfo.Create;
  Info2 := TFileInfo.Create;
  try
    Info1.Init(List1, HashFunc);
    Info2.Init(List2, HashFunc);
    DiffRange(0, Info1.Count, 0, Info2.Count);
  finally
    Info2.Free;
    Info1.Free;
  end;
end;

{----------------------------------------------------------------------}

function RemoveSpaces(const s: string): string;
var
  len, p: integer;
  pc: PChar;
begin
  len := Length(s);
  SetLength(Result, len);

  pc := @s[1];
  p := 0;
  while len > 0 do
  begin
    if not (pc^ in [#0, #9, #10, #13, #32]) then
    begin
      inc(p);
      Result[p] := pc^;
    end;

    inc(pc);
    dec(len);
  end;

  SetLength(Result, p);
end;

function ReduceSpaces(const s: string): string;
var
  len, p, sp: integer;
  pc: PChar;
begin
  len := Length(s);
  SetLength(Result, len);

  pc := @s[1];
  p := 0;
  sp := 0;
  while len > 0 do
  begin
    if pc^ in [#0, #9, #10, #13, #32] then
      inc(sp, p)
    else
    begin
      if sp > 0 then
      begin
        sp := 0;
        inc(p);
        Result[p] := ' ';
      end;

      inc(p);
      Result[p] := pc^;
    end;

    inc(pc);
    dec(len);
  end;

  SetLength(Result, p);
end;

{----------------------------------------------------------------------}

function TStandardTextComparer.Hash_ynn(const s: string): TDiffHashValue;
begin
  Result := DefaultHash(Lowercase(s));
end;

function TStandardTextComparer.Hash_nyn(const s: string): TDiffHashValue;
begin
  Result := DefaultHash(RemoveSpaces(s));
end;

function TStandardTextComparer.Hash_yyn(const s: string): TDiffHashValue;
begin
  Result := DefaultHash(Lowercase(RemoveSpaces(s)));
end;

function TStandardTextComparer.Hash_nny(const s: string): TDiffHashValue;
begin
  Result := DefaultHash(ReduceSpaces(s));
end;

function TStandardTextComparer.Hash_yny(const s: string): TDiffHashValue;
begin
  Result := DefaultHash(Lowercase(ReduceSpaces(s)));
end;

function TStandardTextComparer.Compare_ynn(const s1, s2: string): boolean;
begin
  Result := Lowercase(s1) = Lowercase(s2);
end;

function TStandardTextComparer.Compare_nyn(const s1, s2: string): boolean;
begin
  Result := RemoveSpaces(s1) = RemoveSpaces(s2);
end;

function TStandardTextComparer.Compare_yyn(const s1, s2: string): boolean;
begin
  Result := Lowercase(RemoveSpaces(s1)) = Lowercase(RemoveSpaces(s2));
end;

function TStandardTextComparer.Compare_nny(const s1, s2: string): boolean;
begin
  Result := ReduceSpaces(s1) = ReduceSpaces(s2);
end;

function TStandardTextComparer.Compare_yny(const s1, s2: string): boolean;
begin
  Result := Lowercase(ReduceSpaces(s1)) = Lowercase(ReduceSpaces(s2));
end;

procedure TStandardTextComparer.Compare(List1, List2: TStrings);
var
  HashFunc: TTextComparerHash;
  CompareFunc: TTextComparerCompare;
begin
  HashFunc := FOnHash;
  CompareFunc := FOnCompare;

  try
    case 4 * Ord(FIgnoreCase) + 2 * Ord(FIgnoreSpaces) + Ord(FSpacesAsOne) of
      0 + 0 + 0: begin
        FOnHash := nil;
        FOnCompare := nil;
      end;
      0 + 0 + 1: begin
        FOnHash := Hash_nny;
        FOnCompare := Compare_nny;
      end;
      0 + 2 + 0,
      0 + 2 + 1: begin
        FOnHash := Hash_nyn;
        FOnCompare := Compare_nyn;
      end;
      4 + 0 + 0: begin
        FOnHash := Hash_ynn;
        FOnCompare := Compare_ynn;
      end;
      4 + 0 + 1: begin
        FOnHash := Hash_yny;
        FOnCompare := Compare_yny;
      end;
      4 + 2 + 0,
      4 + 2 + 1: begin
        FOnHash := Hash_yyn;
        FOnCompare := Compare_yyn;
      end;
    end;

    inherited Compare(List1, List2);
  finally
    FOnHash := HashFunc;
    FOnCompare := CompareFunc;
  end;
end;

end.
Flocke's Garage
Valid HTML 4.01 Transitional Valid CSS!
(C) 2005-2018 Volker Siebert.
Creative Commons-LizenzvertragDer gesamte Inhalt dieser Webseite steht unter einer Creative Commons-Lizenz (sofern nicht anders angegeben).