Home Using FPC .o files in Delphi 2010
Reply: 1

Using FPC .o files in Delphi 2010

mrabat
1#
mrabat Published in 2018-02-09 21:22:28Z

I've written quite a big library for matrix operations for Delphi and FPC. There exists now an extension for this library for the Intel AVX extension but I could only manage to get that compiled in FPC. My idea was to create .o files in FPC which contains the AVX assembler codes and include these files in Delphi. I tried to follow this question here: Linking FPC .o files into Delphi

but without success. I was able to dump the function names and tried to import these in the Delphi unit. The problem is that I always get an error saying that the .o files is in the wrong format.

I use CodeTyphoon for compilation which internally uses FPC 3.1.1 and Delphi2010 as a first try.

The code is once compiled in FPC and one time in Delphi using the approriate ifdefs.

My base code looks like this (just an excerpt):

// ###################################################################
// #### This file is part of the mathematics library project, and is
// #### offered under the licence agreement described on
// #### http://www.mrsoft.org/
// ####
// #### Copyright:(c) 2011, Michael R. . All rights reserved.
// ####
// #### Unless required by applicable law or agreed to in writing, software
// #### distributed under the License is distributed on an "AS IS" BASIS,
// #### WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// #### See the License for the specific language governing permissions and
// #### limitations under the License.
// ###################################################################


unit AVXMatrixMultOperations;

interface

{$IFDEF CPUX64}
{$DEFINE x64}
{$ENDIF}
{$IFDEF cpux86_64}
{$DEFINE x64}
{$ENDIF}
{$IFNDEF x64}

uses MatrixConst;

{$IFNDEF FPC}
// this fails -> wrong object format
{$L '.\AVXPrecompiled\win32\AVXMatrixMultOperations.o'}
{$ENDIF}

// full matrix operations
procedure AVXMatrixMultAligned(dest : PDouble; const destLineWidth : TASMNativeInt; mt1, mt2 : PDouble; width1, height1, width2, height2 : TASMNativeInt; const LineWidth1, LineWidth2 : TASMNativeInt);
{$IFNDEF FPC} external '' name 'AVXMATRIXMULTOPERATIONS_$$_AVXMATRIXMULTALIGNED$crc2A67AB04'; {$ENDIF}

{$ENDIF}

implementation

{$IFDEF FPC} {$ASMMODE intel} {$ENDIF}

{$IFNDEF x64}

{$IFDEF FPC}

procedure AVXMatrixMultAligned(dest : PDouble; const destLineWidth : TASMNativeInt; mt1, mt2 : PDouble; width1, height1, width2, height2 : TASMNativeInt; const LineWidth1, LineWidth2 : TASMNativeInt);
var bytesWidth2, destOffset : TASMNativeInt;
    iter : TASMNativeInt;
{$IFDEF FPC}
begin
{$ENDIF}
asm
   // prolog - simulate stack
   push ebx;
   push edi;
   push esi;

   mov ecx, dest;

   mov edi, width1;
   imul edi, -8;
   mov iter, edi;

   sub mt1, edi;

   //destOffset := destLineWidth - Width2*sizeof(double);
   mov ebx, Width2;
   shl ebx, 3;
   mov eax, destLineWidth;
   sub eax, ebx;
   mov destOffset, eax;

   //bytesWidth2 := width2*sizeof(double);
   mov bytesWidth2, ebx;

   // for y := 0 to height1 - 1 do
   @@foryloop:

      // r12 -> counter to width2
      mov esi, width2;
      sub esi, 2;
      jl @LastXColumn;

      @@forxloop:
      // for x := 0 to width2 div 2 - 1
          // esi: mt1 - width1*sizeof(double)
          // mt2: mt2
          mov edx, mt1;
          mov ebx, mt2;
          mov eax, iter;
          mov edi, LineWidth2;

          vxorpd ymm0, ymm0, ymm0;
          vxorpd ymm1, ymm1, ymm1;

          cmp eax, -32;
          jg @@Innerloop2Begin;

          // for z := 0 to width1 - 1do
          // AVX part:
          @@InnerLoop1:
             // 4x4 block
             vmovapd xmm2, [ebx];
             add ebx, edi;
             vmovapd xmm4, xmm2;

             vmovapd xmm3, [ebx];
             add ebx, edi;

             // shuffle so we can multiply

             // swap such that we can immediately multiply
             vmovlhps xmm2, xmm2, xmm3;
             vmovhlps xmm3, xmm3, xmm4;

             // next 4 elements
             vmovapd xmm4, [ebx];
             add ebx, edi;
             vmovapd xmm6, xmm4;

             vmovapd xmm5, [ebx];
             add ebx, edi;

             vmovapd ymm7, [edx + eax]

             vmovlhps xmm4, xmm4, xmm5;
             vmovhlps xmm5, xmm5, xmm6;

             vinsertf128 ymm2, ymm2, xmm4, 1;
             vinsertf128 ymm3, ymm3, xmm5, 1;

             // now multiply and add
             vmulpd ymm2, ymm2, ymm7;
             vmulpd ymm3, ymm3, ymm7;

             vaddpd ymm0, ymm0, ymm2;
             vaddpd ymm1, ymm1, ymm3;
          add eax, 32;
          jl @@InnerLoop1;

          vextractf128 xmm2, ymm0, 1;
          vextractf128 xmm3, ymm1, 1;

          vhaddpd xmm0, xmm0, xmm2;
          vhaddpd xmm1, xmm1, xmm3;

          test eax, eax;
          jz @@InnerLoopEnd2;

          @@Innerloop2Begin:

          // rest in single elements
          @@InnerLoop2:
             vmovapd xmm2, [ebx];
             add ebx, edi;

             vmovddup xmm3, [edx + eax];

             vmulpd xmm2, xmm2, xmm3;
             vmovhlps xmm4, xmm4, xmm2;

             vaddsd xmm0, xmm0, xmm2;
             vaddsd xmm1, xmm1, xmm4;
          add eax, 8;
          jnz @@InnerLoop2;

          @@InnerLoopEnd2:

          // finall horizontal addition
          vhaddpd xmm0, xmm0, xmm1;

          vmovapd [ecx], xmm0;

          // increment the pointers
          // inc(mt2), inc(dest);
          //add dword ptr [mt2], 8;
          add mt2, 16;
          add ecx, 16;

      // end for x := 0 to width2 div 2 - 1
      sub esi, 2;
      jge @@forxloop;

      @LastXColumn:

      cmp esi, -1;
      jne @NextLine;

      // last column of mt2
      mov eax, iter;
      mov ebx, mt2;

      vxorpd xmm0, xmm0, xmm0;

      @InnerLoop2:
         vmovsd xmm1, [edx + eax];
         vmovsd xmm2, [ebx];

         vmulsd xmm1, xmm1, xmm2;
         vaddsd xmm0, xmm0, xmm1;

         add ebx, edi;
      add eax, 8;
      jnz @InnerLoop2;

      vmovsd [ecx], xmm0;
      add ecx, 8;
      add mt2, 8;

      @NextLine:
      // dec(mt2, Width2);
      // inc(PByte(mt1), LineWidth1);
      // inc(PByte(dest), destOffset);
      //mov ebx, bytesWidth2;
      //sub dword ptr [mt2], ebx;
      mov eax, bytesWidth2;
      sub mt2, eax;
      mov eax, LineWidth1;
      add mt1, eax;
      add ecx, destOffset;

   // end for y := 0 to height1 - 1
   //dec eax;
   dec height1;
   jnz @@foryloop;

   // epilog
   vzeroupper;

   pop esi;
   pop edi;
   pop ebx;
end;
{$IFDEF FPC}
end;
{$ENDIF}

{$ENDIF}

{$ENDIF}

end.
Arnaud Bouchez
2#
Arnaud Bouchez Reply to 2018-02-12 13:49:26Z

Since there is a single function involved here, the easiest is IMHO to convert the FPC AVXMatrixMultOperations.o file directly.

Use the great Object file converter tool.

You may try to convert from one binary format to another, accepted by Delphi.

But I guess that the cleanest way is to convert it to asm:

objconv -fasm AVXMatrixMultOperations.o

It will create a AVXMatrixMultOperations.asm file, which could be used to replace the unknown AVX instructions by simple db ..,..,..,.. bytes. Typically, the generated .asm file has the assembler on the left side, and the raw hexadecimal bytes on the right side.

This is how I dealt with old Delphi compilers in my libraries, for instance:

function crc32csse42(crc: cardinal; buf: PAnsiChar; len: cardinal): cardinal;
asm // eax=crc, edx=buf, ecx=len
        not     eax
        test    ecx, ecx
        jz      @0
        test    edx, edx
        jz      @0
@3:     test    edx, 3
        jz      @8 // align to 4 bytes boundary
        {$ifdef ISDELPHI2010}
        crc32   eax, byte ptr[edx]
        {$else}
        db      $F2, $0F, $38, $F0, $02
        {$endif}
        inc     edx
        ....

So in your case, something like

{$ifdef FPC}
vinsertf128 ymm2, ymm2, xmm4, 1;
vinsertf128 ymm3, ymm3, xmm5, 1;
{$else}
db $xx,$yy,$zz
db $xx,$yy,$zz
{$endif}
You need to login account before you can post.

About| Privacy statement| Terms of Service| Advertising| Contact us| Help| Sitemap|
Processed in 0.313279 second(s) , Gzip On .

© 2016 Powered by mzan.com design MATCHINFO