unit udf_levenshtein;

interface

{Nicholas Sherlock, 2006.

 Example of a UDF: Levenshtein string distance.
 After copying the dll to your MySql/bin folder, create with:
 CREATE FUNCTION Levenshtein RETURNS INT SONAME 'LevenshteinUDF.dll';}

uses SysUtils, mysql_udf;

function stringdistance(const s, t: string): integer; overload;
function stringdistance(const s, t: string; maxdist: integer; out dist: integer): boolean; overload;

implementation

type
  TIntArray = array[0..maxint div 4 - 1] of integer; {Avoid errors if range check on}
  pintarray = ^TIntArray;

function Levenshtein_init(var init: UDF_INIT; var _args: UDF_ARGS; message: pChar): my_bool; cdecl;
var args: TUDF_Args;
begin
  result := 0;
  args := TUDF_Args.create(@_args);
  try
    if (not (args.Count in [2..3])) or ((args.count = 3) and (args[2].ArgType <> INT_RESULT)) then begin
      strcopy(message, 'Levenshtein() usage: Levenshtein(string1, string2 [,maxdist])');
      result := 1; //fail
      exit;
    end;

    args[0].ArgType := STRING_RESULT;
    args[1].ArgType := STRING_RESULT; //Mysql will now coerce these args to string for us

    if args.count = 3 then
      init.maybe_null := 1 else //since we could exceed maxdist
      init.maybe_null := 0;

  finally
    args.free;
  end;
end;

procedure Levenshtein_deinit(var init: UDF_INIT); cdecl;
begin
 {Do nothing. We could actually omit this function, MySql won't
  try to call it if it doesn't exist}
end;

function Levenshtein(var init: UDF_INIT; var _args: UDF_ARGS; var is_null, error: byte): int64; cdecl;
var args: TUDF_Args;
  dist: integer;
begin
  error := 0;
  is_null := 0;
  args := TUDF_Args.create(@_args);
  try
    if args.Count >= 3 then begin
      if not stringdistance(args[0].DataString, args[1].DataString, args[2].DataInteger, dist) then begin
        result := 0;
        is_null := 1;
      end else //didn't fall inside maximum distance
        result := dist;
    end else
      result := stringdistance(args[0].DataString, args[1].DataString);
  finally
    args.free;
  end;
end;

function min3(a, b, c: integer): integer;
begin
  if (a <= b) and (a <= c) then begin
    result := a;
    exit;
  end;
  if (b <= a) and (b <= c) then begin
    result := b;
    Exit;
  end;
  result := c;
end;

{Find the distance between the two strings s and t. We are only interested in
 the distance if the distance is less than or equal to maxdist. The result is
 true if the distance has been found (Distance stored in dist), it may be greater
 than maxdist. If the result is false, then the distance has not been found, but
 it is greater than maxdist}

function stringdistance(const s, t: string; maxdist: integer; out dist: integer): boolean;
const
  cost_ins = 1;
  cost_del = 1;
  cost_sub = 1;
var n, m: integer;
  p, q, r: PIntArray;
  smallest, cost, i, j: integer;
begin
  result := false; //Didn't meet max dist requirement

  m := length(s);
  n := length(t);

  if abs(m - n) > maxdist then
    exit; //dist is always at least the difference between lengths of args

  if n = 0 then begin
    dist := m;
    result := true;
    exit;
  end else
    if m = 0 then begin
      dist := n;
      result := true;
      exit;
    end;

  getmem(p, (n + 1) * SizeOf(integer)); {p is first row}
  getmem(q, (n + 1) * SizeOf(integer)); {q is second row}

  p[0] := 0;
  for i := 1 to n do
    p[i] := p[i - 1] + cost_ins; {fill first row}

  for i := 1 to m do begin
    q[0] := p[0] + cost_del; //cost del
    smallest := q[0];
    for j := 1 to n do begin
      if s[i] = t[j] then cost := 0 else cost := cost_sub;
      q[j] := min3(p[j] + cost_del,
        q[j - 1] + cost_ins,
        p[j - 1] + cost {cost subst}
        );
        if q[j]<smallest then
        smallest:=q[j];
    end;
    if smallest > maxdist then
      exit; //exceeded maxdist

    r := p; {Swap over the two rows}
    p := q;
    q := r;
  end;

  dist := p[n];
  result := true;

  FreeMem(p);
  FreeMem(q);
end;

function stringdistance(const s, t: string): integer;
const
  cost_ins = 1;
  cost_del = 1;
  cost_sub = 1;
var n, m: integer;
  p, q, r: PIntArray;
  cost, i, j: integer;
begin
  m := length(s);
  n := length(t);

  if n = 0 then begin
    result := m;
    exit;
  end else
    if m = 0 then begin
      result := n;
      exit;
    end;

  getmem(p, (n + 1) * SizeOf(integer)); {p is first row}
  getmem(q, (n + 1) * SizeOf(integer)); {q is second row}

  p[0] := 0;
  for i := 1 to n do
    p[i] := p[i - 1] + cost_ins; {fill first row}

  for i := 1 to m do begin
    q[0] := p[0] + cost_del;
    for j := 1 to n do begin
      if s[i] = t[j] then cost := 0 else cost := cost_sub;
      q[j] := min3(p[j] + cost_del, {cost del}
        q[j - 1] + cost_ins, {cost ins}
        p[j - 1] + cost {cost subst}
        );
    end;

    r := p; {Swap over the two rows}
    p := q;
    q := r;
  end;

  result := p[n];

  FreeMem(p);
  FreeMem(q);
end;

exports
  Levenshtein_init,
  Levenshtein,
  Levenshtein_deinit;

end.

