Pascal Diff, Version 0.2 (alt)
Hinweis: dies ist nicht die neueste Version!
Datei: diff.pas
{ Diff.pas Copyright (C) 2005 Volker Siebert, Germany All rights reserved. DESCRIPTION This unit for Delphi 5, 6, 7, and 2005 contains code to compare to text documents stored in TString objects line by line in the way the classical unix diff program works. Sample usage: Diff := TTextComparer.Create; try Diff.Compare(Strings1, Strings2); Label1.Caption := Format('There are %d differences', [Diff.Count]); // Process Diff.Diff[x].(Left|Right).(Start.Stop) finally Diff.Free; end; TTextComparer uses a hash on all lines to be able to compare them fast. If you provide an own compare function (i.e. not case sensitive), then you also have to provide an own hash function. The only thing the hash function must ensure is that if two strings match by your custom compare function, i.e. "MyOwnCompareFunction(s1, s2) = true", then both strings must have the same hash using your custom hash function, i.e., "MyOwnHashFunction(s1) = MyOwnHashFunction(s2)". Note: it will also work if you just a provide 0 as every hash value, but it will slow down the algorithm. LICENSE The contents of this file are subject to the Mozilla Public License Version 1.1 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.mozilla.org/MPL/ Software distributed under the License is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the specific language governing rights and limitations under the License. CHANGELOG 2005-07-29: Version 0.2 - Added TStandardTextComparer 2005-07-29: Version 0.1 - Initial version } unit Diff; interface uses SysUtils, Classes; type TDiffHashValue = Cardinal; TRangeOfLines = record Start: integer; // including Stop: integer; // excluding end; PDifference = ^TDifference; TDifference = record Left: TRangeOfLines; // left file (first one) Right: TRangeOfLines; // right file end; PDifferenceArray = ^TDifferenceArray; TDifferenceArray = array [0 .. MaxInt div 32] of TDifference; ETextComparerError = class(Exception); TTextComparerHash = function(const s: string): TDiffHashValue of object; register; TTextComparerCompare = function(const s1, s2: string): boolean of object; register; TCustomTextComparer = class private FDiffs: PDifferenceArray; FCount: integer; FAlloc: integer; FHeuristic: integer; FOnHash: TTextComparerHash; FOnCompare: TTextComparerCompare; function GetDiff(Index: integer): TDifference; procedure SetSize(NewCount: integer); protected function DefaultHash(const s: string): TDiffHashValue; register; function DefaultCompare(const s1, s2: string): boolean; register; property OnHash: TTextComparerHash read FOnHash write FOnHash; property OnCompare: TTextComparerCompare read FOnCompare write FOnCompare; public constructor Create; destructor Destroy; override; procedure Clear; procedure Compare(List1, List2: TStrings); property Count: integer read FCount; property Diff[Index: integer]: TDifference read GetDiff; default; property Heuristic: integer read FHeuristic write FHeuristic; end; TTextComparer = class(TCustomTextComparer) public property OnHash; property OnCompare; end; TStandardTextComparer = class(TCustomTextComparer) private FIgnoreCase: boolean; FIgnoreSpaces: boolean; FSpacesAsOne: boolean; protected function Hash_ynn(const s: string): TDiffHashValue; register; function Hash_nyn(const s: string): TDiffHashValue; register; function Hash_yyn(const s: string): TDiffHashValue; register; function Hash_nny(const s: string): TDiffHashValue; register; function Hash_yny(const s: string): TDiffHashValue; register; function Compare_ynn(const s1, s2: string): boolean; register; function Compare_nyn(const s1, s2: string): boolean; register; function Compare_yyn(const s1, s2: string): boolean; register; function Compare_nny(const s1, s2: string): boolean; register; function Compare_yny(const s1, s2: string): boolean; register; public procedure Compare(List1, List2: TStrings); property IgnoreCase: boolean read FIgnoreCase write FIgnoreCase; property IgnoreSpaces: boolean read FIgnoreSpaces write FIgnoreSpaces; property SpacesAsOne: boolean read FSpacesAsOne write FSpacesAsOne; end; implementation constructor TCustomTextComparer.Create; begin inherited; FDiffs := nil; FCount := 0; FAlloc := 0; FHeuristic := 40; end; destructor TCustomTextComparer.Destroy; begin Clear; inherited; end; procedure TCustomTextComparer.SetSize(NewCount: integer); begin FAlloc := NewCount; ReAllocMem(FDiffs, FAlloc * SizeOf(TDifference)); if FCount > FAlloc then FCount := FAlloc; end; procedure TCustomTextComparer.Clear; begin SetSize(0); end; function TCustomTextComparer.GetDiff(Index: integer): TDifference; begin if (Index < 0) or (Index >= FCount) then raise ETextComparerError.Create('Invalid item index'); Result := FDiffs^[Index]; end; function TCustomTextComparer.DefaultHash(const s: string): TDiffHashValue; var k, len: integer; begin Result := 0; len := Length(s); for k := 1 to len do Result := 19 * Result + Ord(s[k]); end; function TCustomTextComparer.DefaultCompare(const s1, s2: string): boolean; begin Result := s1 = s2; end; {----------------------------------------------------------------------} function HashToRange(Hash: TDiffHashValue; Max: integer): integer; inline; begin Result := Integer(Hash) mod Max; if Result < 0 then inc(Result, Max); end; type TFileInfo = class public Strings: TStrings; Count: integer; LineHash: array of TDiffHashValue; // [0 .. Count] HashLine: array of integer; // [0 .. 2 * Count - 1] HashColl: array of integer; // [0 .. 2 * Count - 1] procedure Init(AStrings: TStrings; HashFunc: TTextComparerHash); destructor Destroy; override; end; procedure TFileInfo.Init(AStrings: TStrings; HashFunc: TTextComparerHash); var idx, back, coll: Integer; begin Strings := AStrings; Count := Strings.Count; SetLength(LineHash, Count + 1); SetLength(HashLine, 2 * Count); SetLength(HashColl, 2 * Count); if Count < 0 then exit; LineHash[Count] := HashFunc(''); for idx := 0 to Count - 1 do begin LineHash[idx] := HashFunc(Strings[idx]); HashLine[idx] := -1; HashColl[idx] := -1; end; coll := Count - 1; for idx := 0 to Count - 1 do begin back := HashToRange(LineHash[idx], Count); if HashLine[back] < 0 then HashLine[back] := idx else begin inc(coll); HashLine[coll] := HashLine[back]; HashColl[coll] := HashColl[back]; HashLine[back] := idx; HashColl[back] := coll; end; end; end; destructor TFileInfo.Destroy; begin SetLength(LineHash, 0); SetLength(HashLine, 0); SetLength(HashColl, 0); end; {----------------------------------------------------------------------} procedure TCustomTextComparer.Compare(List1, List2: TStrings); var HashFunc: TTextComparerHash; CompareFunc: TTextComparerCompare; Info1, Info2: TFileInfo; procedure AddDiff(l1a, l1e, l2a, l2e: integer); begin if (l1a >= l1e) and (l2a >= l2e) then exit; if FCount >= FAlloc then SetSize(FAlloc + 32); with FDiffs^[FCount] do begin Left.Start := l1a; Left.Stop := l1e; Right.Start := l2a; Right.Stop := l2e; end; inc(FCount); end; function DistanceLess(l1a, l1e, l2a, l2e: integer; bp1, bp2, np1, np2: integer): boolean; var t1, b1, t2, b2: integer; begin t1 := abs((bp1 - l1a) - (bp2 - l2a)); b1 := abs((l1e - bp1) - (l2e - bp2)); t2 := abs((np1 - l2a) - (np2 - l2a)); b2 := abs((l2e - np1) - (l2e - np2)); Result := (t2 + b2) < (t1 + b1); end; function SameLines(l1p, l1e, l2p, l2e: integer): integer; begin Result := 0; while (l1p < l1e) and (l2p < l2e) and (Info1.LineHash[l1p] = Info2.LineHash[l2p]) and CompareFunc(Info1.Strings[l1p], Info2.Strings[l2p]) do begin inc(l1p); inc(l2p); inc(Result); end; end; procedure DiffRange(l1a, l1e, l2a, l2e: integer); var l1s, l2s, maxm, bp1, bp2, bsz, scan, idx, pos, cnt: integer; begin while true do begin l1s := l1e - l1a; l2s := l2e - l2a; if l1s > l2s then maxm := l2s else maxm := l1s; if maxm = 0 then break; bp1 := -1; bp2 := -1; bsz := 0; if l1s < l2s then begin // Block 1 is smaller scan := l1a; while scan < l1e - bsz do begin idx := HashToRange(Info1.LineHash[scan], Info2.Count); repeat pos := Info2.HashLine[idx]; if pos < l2a then break; // <- catches also -1! if pos + bsz <= l2e then begin cnt := SameLines(scan, l1e, pos, l2e); if cnt = bsz then if DistanceLess(l1a, l1e, l2a, l2e, bp1, bp2, scan, pos) then dec(bsz); if cnt > bsz then begin bsz := cnt; bp1 := scan; bp2 := pos; end; end; idx := Info2.HashColl[idx]; until idx < 0; if bsz > FHeuristic then break; if bsz + bsz > maxm then break; inc(scan); end; end else begin // Block 2 is smaller scan := l2a; while scan < l2e - bsz do begin idx := HashToRange(Info2.LineHash[scan], Info1.Count); repeat pos := Info1.HashLine[idx]; if pos < l1a then break; // <- catches also -1! if pos + bsz <= l1e then begin cnt := SameLines(pos, l1e, scan, l2e); if cnt = bsz then if DistanceLess(l1a, l1e, l2a, l2e, bp1, bp2, pos, scan) then dec(bsz); if cnt > bsz then begin bsz := cnt; bp1 := pos; bp2 := scan; end; end; idx := Info1.HashColl[idx]; until idx < 0; if bsz > FHeuristic then break; if bsz + bsz > maxm then break; inc(scan); end; end; if bsz = 0 then break; // This way the diffs are always sorted correctly { if (bp1 - l1a) + (bp2 - l2a) < (l1e - bp1 - bsz) + (l2e - bp2 - bsz) then begin } DiffRange(l1a, bp1, l2a, bp2); l1a := bp1 + bsz; l2a := bp2 + bsz; { end else begin DiffRange(bp1 + bsz, l1e, bp2 + bsz, l2e); l1e := bp1; l2e := bp2; end; } end; AddDiff(l1a, l1e, l2a, l2e); end; begin Clear; if Assigned(FOnHash) and Assigned(FOnCompare) then begin HashFunc := FOnHash; CompareFunc := FOnCompare; end else begin HashFunc := DefaultHash; CompareFunc := DefaultCompare; end; Info1 := TFileInfo.Create; Info2 := TFileInfo.Create; try Info1.Init(List1, HashFunc); Info2.Init(List2, HashFunc); DiffRange(0, Info1.Count, 0, Info2.Count); finally Info2.Free; Info1.Free; end; end; {----------------------------------------------------------------------} function RemoveSpaces(const s: string): string; var len, p: integer; pc: PChar; begin len := Length(s); SetLength(Result, len); pc := @s[1]; p := 0; while len > 0 do begin if not (pc^ in [#0, #9, #10, #13, #32]) then begin inc(p); Result[p] := pc^; end; inc(pc); dec(len); end; SetLength(Result, p); end; function ReduceSpaces(const s: string): string; var len, p, sp: integer; pc: PChar; begin len := Length(s); SetLength(Result, len); pc := @s[1]; p := 0; sp := 0; while len > 0 do begin if pc^ in [#0, #9, #10, #13, #32] then inc(sp, p) else begin if sp > 0 then begin sp := 0; inc(p); Result[p] := ' '; end; inc(p); Result[p] := pc^; end; inc(pc); dec(len); end; SetLength(Result, p); end; {----------------------------------------------------------------------} function TStandardTextComparer.Hash_ynn(const s: string): TDiffHashValue; begin Result := DefaultHash(Lowercase(s)); end; function TStandardTextComparer.Hash_nyn(const s: string): TDiffHashValue; begin Result := DefaultHash(RemoveSpaces(s)); end; function TStandardTextComparer.Hash_yyn(const s: string): TDiffHashValue; begin Result := DefaultHash(Lowercase(RemoveSpaces(s))); end; function TStandardTextComparer.Hash_nny(const s: string): TDiffHashValue; begin Result := DefaultHash(ReduceSpaces(s)); end; function TStandardTextComparer.Hash_yny(const s: string): TDiffHashValue; begin Result := DefaultHash(Lowercase(ReduceSpaces(s))); end; function TStandardTextComparer.Compare_ynn(const s1, s2: string): boolean; begin Result := Lowercase(s1) = Lowercase(s2); end; function TStandardTextComparer.Compare_nyn(const s1, s2: string): boolean; begin Result := RemoveSpaces(s1) = RemoveSpaces(s2); end; function TStandardTextComparer.Compare_yyn(const s1, s2: string): boolean; begin Result := Lowercase(RemoveSpaces(s1)) = Lowercase(RemoveSpaces(s2)); end; function TStandardTextComparer.Compare_nny(const s1, s2: string): boolean; begin Result := ReduceSpaces(s1) = ReduceSpaces(s2); end; function TStandardTextComparer.Compare_yny(const s1, s2: string): boolean; begin Result := Lowercase(ReduceSpaces(s1)) = Lowercase(ReduceSpaces(s2)); end; procedure TStandardTextComparer.Compare(List1, List2: TStrings); var HashFunc: TTextComparerHash; CompareFunc: TTextComparerCompare; begin HashFunc := FOnHash; CompareFunc := FOnCompare; try case 4 * Ord(FIgnoreCase) + 2 * Ord(FIgnoreSpaces) + Ord(FSpacesAsOne) of 0 + 0 + 0: begin FOnHash := nil; FOnCompare := nil; end; 0 + 0 + 1: begin FOnHash := Hash_nny; FOnCompare := Compare_nny; end; 0 + 2 + 0, 0 + 2 + 1: begin FOnHash := Hash_nyn; FOnCompare := Compare_nyn; end; 4 + 0 + 0: begin FOnHash := Hash_ynn; FOnCompare := Compare_ynn; end; 4 + 0 + 1: begin FOnHash := Hash_yny; FOnCompare := Compare_yny; end; 4 + 2 + 0, 4 + 2 + 1: begin FOnHash := Hash_yyn; FOnCompare := Compare_yyn; end; end; inherited Compare(List1, List2); finally FOnHash := HashFunc; FOnCompare := CompareFunc; end; end; end. |