unit CdeclClosure;
// Rob Kennedy
// 11 July 2005
// http://www.cs.wisc.edu/~rkennedy/

// The code in this unit allows for the use of methods in the place of callback
// functions that use the cdecl calling convention. Call MakeCdeclInstance on
// the method you want to use, and then use the result as a function pointer.
// Call FreeCdeclInstance when you are finished with the method.

// The method to be wrapped must be a stdcall method, NOT cdecl. (It's too much
// work to generate code to call a cdecl method from within a cdecl function.)

// This code is loosely based on Borland's implementation of MakeObjectInstance
// in Classes.pas.

interface

uses
  Classes;

// Pass the address of the method to be wrapped, along with a number specifying
// how many bytes of parameters the function takes. In general, figure four
// bytes per argument, even for parameter types that aren't that large.
function MakeCdeclInstance(const StdcallMethod: TMethod; const ArgSize: Word): Pointer;
procedure FreeCdeclInstance(CdeclProc: Pointer);

implementation

uses
  Windows, SysUtils;

var
  MinReserveSize: DWord;
  PageSize: DWord;
  MaxPagesPerBlock: Cardinal;
  ProcsPerPage: Cardinal;
  Lock: TRTLCriticalSection;

type
  TCallInstruction = packed record
    Opcode: Byte;
    Offset: LongInt;
  end;

  PCallData = ^TCallData;
  TCallData = packed record
    Method: TMethod;
    ArgSize: Word;
  end;

  PCdeclProc = ^TCdeclProc;
  TCdeclProc = packed record
    Code: TCallInstruction;
  case Boolean of
    False: (Next: PCdeclProc);
    True: (
      Data: TCallData;
      // This padding is here in the interest of making the
      // code in this structure be aligned on a 16-byte
      // bounary, which should allow it to run a little better.
      Padding: array[1..1] of Byte;
    );
  end;

  PPage = ^TPage;
  TPage = packed record
    PreviousPage: PPage;
    PageIndex: Cardinal;
    Procs: array[0..0] of TCdeclProc;
  end;

procedure InitGlobals;
var
  SystemInfo: TSystemInfo;
begin
  GetSystemInfo(SystemInfo);
  MinReserveSize := SystemInfo.dwAllocationGranularity;
  PageSize := SystemInfo.dwPageSize;
  MaxPagesPerBlock := MinReserveSize div PageSize;
  Assert(MinReserveSize mod PageSize = 0);
  Assert(SizeOf(TCdeclProc) mod 16 = 0);
  ProcsPerPage := Succ((PageSize - SizeOf(TPage)) div SizeOf(TCdeclProc));
end;

function CopyArgs(const Source, Dest: Pointer; const CallData: PCallData): PCallData;
begin
  // We add SizeOf(Pointer) here because we want to copy the return address as
  // well as the arguments. This keeps us from having to save the old return
  // address while the wrapped function is executing, letting us use a JMP
  // instead of a CALL at the end of CdeclStub.
  Move(Source^, Dest^, CallData.ArgSize + SizeOf(Pointer));
  Result := CallData;
end;

function CalcJmpOffset(const Src, Dest: Pointer): LongInt;
begin
  Result := Longint(Dest) - (Longint(Src) + SizeOf(TCallInstruction));
end;

procedure CdeclStub;//(const Data: PCallData); stdcall;
// The signature for this function is commented out because if we declared it as
// a stdcall function (which it is), then Delphi would add its prologue code to
// it, and we don't want that code. Delphi doesn't add a prologue to simple
// register procedures.
asm
  // ECX <- Data
  pop ecx
  mov eax, esp
  // Allocate stack space for copy of args
  movzx edx, [ecx].TCallData.ArgSize
  sub esp, edx
  mov edx, esp
  // EAX points to the return address, and beyond it are the parameters.
  // EDX points to the space that will hold a copy of the parameters.
  // The two regions actually overlap by 4 bytes; see explanation in CopyArgs.
  call CopyArgs
  // Return address is on top of stack. We need an additional argument beneath
  // that value.
  mov edx, [eax].TCallData.Method.Data // Self
  pop ecx                              // return address
  push edx                             // Self
  push ecx                             // return address
  // Transfer control to the wrapped method
  jmp [eax].TCallData.Method.Code
  // When the wrapped method returns, it will go back to the original caller,
  // NOT here. While returning, the method will remove ArgSize bytes from the
  // stack because that's what stdcall methods do. Since the caller called this
  // stub as a cdecl function, though, it will also remove ArgSize bytes from
  // the stack. That's why the code above makes a _copy_ of the arguments.
end;

var
  FirstFreeProc: PCdeclProc = nil;
  MostRecentPage: PPage = nil;

procedure AllocateNewPage;
var
  Proc: PCdeclProc;
  NewPage: PPage;
  i: Cardinal;
begin
  if not Assigned(MostRecentPage) then InitGlobals;
  if not Assigned(MostRecentPage) or (MostRecentPage.PageIndex >= Pred(MaxPagesPerBlock)) then begin
    // Nothing has ever been allocated, or we need to allocate a whole new block
    NewPage := VirtualAlloc(nil, MinReserveSize, Mem_Reserve, Page_NoAccess);
    Win32Check(Assigned(NewPage));
    NewPage := VirtualAlloc(NewPage, PageSize, Mem_Commit, Page_Execute_ReadWrite);
    Win32Check(Assigned(NewPage));
    NewPage.PageIndex := 0;
  end else begin
    // Get the next page from the reserved space
    NewPage := VirtualAlloc(PAnsiChar(MostRecentPage) + PageSize, PageSize, Mem_Commit, Page_Execute_ReadWrite);
    Win32Check(Assigned(NewPage));
    NewPage.PageIndex := Succ(MostRecentPage.PageIndex);
  end;
  NewPage.PreviousPage := MostRecentPage;
  MostRecentPage := NewPage;

  // Control reaches this Code value via one of the Proc items, which has
  // placed on the stack a pointer to the method that should be called. This
  // code then jumps to the CdeclStub procedure.
  //NewPage.Code.Jmp.Opcode := $e9; // jmp
  //NewPage.Code.Jmp.Offset := CalcJmpOffset(@NewPage.Code.Jmp, @CdeclStub);
  Proc := @NewPage.Procs[0];
  for i := 1 to ProcsPerPage do begin
    // The CALL instruction does two things, both of which are important in
    // this situation. It jumps the specified number of bytes, which is a
    // negative number, in this case, to the instruction in NewPage.Code.
    // But before it jumps there, it pushes the address of the NEXT
    // instruction onto the stack. In this case, there is no next
    // instruction. Instead, at that address is the Proc.Method value.
    Proc.Code.Opcode := $e8; // call near ptr Offset
    Proc.Code.Offset := CalcJmpOffset(@Proc.Code, @CdeclStub);//NewPage.Code);
    Proc.Next := FirstFreeProc;
    FirstFreeProc := Proc;
    Inc(Proc);
  end;
end;

function MakeCdeclInstance(const StdcallMethod: TMethod; const ArgSize: Word): Pointer;
var
  Proc: PCdeclProc absolute Result;
begin
  if IsMultithread then EnterCriticalSection(Lock);
  try
    if not Assigned(FirstFreeProc) then
      // No unused procedures remain. Allocate another page of them.
      AllocateNewPage;
    Assert(Assigned(FirstFreeProc));
    Proc := FirstFreeProc;
    FirstFreeProc := Proc.Next;
  finally
    if IsMultithread then LeaveCriticalSection(Lock);
  end;
  Proc.Data.Method := StdcallMethod;
  Proc.Data.ArgSize := ArgSize;
end;

procedure FreeCdeclInstance(CdeclProc: Pointer);
begin
  if not Assigned(CdeclProc) then exit;
  if IsMultithread then EnterCriticalSection(Lock);
  try
    PCdeclProc(CdeclProc).Next := FirstFreeProc;
    FirstFreeProc := CdeclProc;
  finally
    if IsMultithread then LeaveCriticalSection(Lock);
  end;
end;

function GetPageReservationBase(Page: PPage): PPage;
begin
  Result := Ptr(LongWord(Page) - (LongWord(Page) mod MinReserveSize));
end;

procedure FreeProcBlocks;
var
  Page: PPage;
begin
  MostRecentPage := GetPageReservationBase(MostRecentPage);
  while Assigned(MostRecentPage) do begin
    Page := MostRecentPage.PreviousPage;
    VirtualFree(MostRecentPage, 0, Mem_Release);
    MostRecentPage := GetPageReservationBase(Page);
  end;
end;

initialization
  InitializeCriticalSection(Lock);
finalization
  FreeProcBlocks;
  DeleteCriticalSection(Lock);
end.