STK modifications to angelscript to let Windows/Apple builds

(and 3 forgotten source files)
This commit is contained in:
Bryan Quigley 2021-11-04 18:31:08 -07:00
parent 73c94f571d
commit b710fab5c8
4 changed files with 786 additions and 4 deletions

View File

@ -1,5 +1,10 @@
cmake_minimum_required(VERSION 3.5)
# STK Fix llvm mingw crashes
if (MINGW AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND CMAKE_SIZEOF_VOID_P EQUAL 4)
ADD_DEFINITIONS(-DAS_MAX_PORTABILITY)
endif()
project(angelscript)
option(BUILD_SHARED_LIBS "Build shared library" OFF)
@ -104,7 +109,21 @@ set(ANGELSCRIPT_SOURCE
../../source/as_variablescope.cpp
)
if(MSVC AND CMAKE_CL_64)
# STK
set(BUILD_ARCH "unknown")
if (CMAKE_GENERATOR_PLATFORM)
set(BUILD_ARCH "${CMAKE_GENERATOR_PLATFORM}")
endif()
if(MSVC AND CMAKE_CL_64 AND NOT ${CMAKE_GENERATOR_PLATFORM} MATCHES "(ARM64)")
enable_language(ASM_MASM)
if(CMAKE_ASM_MASM_COMPILER_WORKS)
set(ANGELSCRIPT_SOURCE ${ANGELSCRIPT_SOURCE} ../../source/as_callfunc_x64_msvc_asm.asm)
else()
message(FATAL ERROR "MSVC x86_64 target requires a working assembler")
endif()
endif()
if(MSVC AND CMAKE_CL_64 AND NOT ${BUILD_ARCH} MATCHES "(ARM64)")
enable_language(ASM_MASM)
if(CMAKE_ASM_MASM_COMPILER_WORKS)
set(ANGELSCRIPT_SOURCE ${ANGELSCRIPT_SOURCE} ../../source/as_callfunc_x64_msvc_asm.asm)
@ -117,7 +136,9 @@ if(${ARCHFLAGS} MATCHES "^arm")
enable_language(ASM)
if(CMAKE_ASM_COMPILER_WORKS)
set(ANGELSCRIPT_SOURCE ${ANGELSCRIPT_SOURCE} ../../source/as_callfunc_arm.cpp ../../source/as_callfunc_arm_gcc.S)
set_property(SOURCE ../../source/as_callfunc_arm_gcc.S APPEND PROPERTY COMPILE_FLAGS " -Wa,-mimplicit-it=always")
if (NOT APPLE)
set_property(SOURCE ../../source/as_callfunc_arm_gcc.S APPEND PROPERTY COMPILE_FLAGS " -Wa,-mimplicit-it=always")
endif()
else()
message(FATAL ERROR "ARM target requires a working assembler")
endif()
@ -127,7 +148,7 @@ if(${ARCHFLAGS} MATCHES "^aarch64")
enable_language(ASM)
if(CMAKE_ASM_COMPILER_WORKS)
set(ANGELSCRIPT_SOURCE ${ANGELSCRIPT_SOURCE} ../../source/as_callfunc_arm64.cpp ../../source/as_callfunc_arm64_gcc.S)
#set_property(SOURCE ../../source/as_callfunc_arm64_gcc.S APPEND PROPERTY COMPILE_FLAGS " -Wa")
set_property(SOURCE ../../source/as_callfunc_arm64_gcc.S APPEND PROPERTY COMPILE_FLAGS " -Wa")
else()
message(FATAL ERROR "ARM target requires a working assembler")
endif()
@ -160,7 +181,7 @@ if(AS_NO_EXCEPTIONS)
endif()
# Fix x64 issues on Linux
if("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64" AND UNIX AND NOT APPLE)
if("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64" AND UNIX AND NOT APPLE AND NOT WIN32)
target_compile_options(${ANGELSCRIPT_LIBRARY_NAME} PRIVATE -fPIC)
endif()

View File

@ -0,0 +1,329 @@
/*
AngelCode Scripting Library
Copyright (c) 2020-2021 Andreas Jonsson
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any
damages arising from the use of this software.
Permission is granted to anyone to use this software for any
purpose, including commercial applications, and to alter it and
redistribute it freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you
must not claim that you wrote the original software. If you use
this software in a product, an acknowledgment in the product
documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and
must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source
distribution.
The original version of this library can be located at:
http://www.angelcode.com/angelscript/
Andreas Jonsson
andreas@angelcode.com
*/
//
// as_callfunc_arm64.cpp
//
// These functions handle the actual calling of system functions on the arm64 platform
//
// Written by Max Waine in July 2020, based on as_callfunc_arm.cpp
//
#include "as_config.h"
#ifndef AS_MAX_PORTABILITY
#ifdef AS_ARM64
#include "as_callfunc.h"
#include "as_scriptengine.h"
#include "as_texts.h"
#include "as_tokendef.h"
#include "as_context.h"
// ARM64 targets use has no software floating-point ABI, it's all hardware (or totally disabled)
#define HFA_RET_REGISTERS 4 // s0-s3/d0-d3
#define GP_ARG_REGISTERS 8 // x0-x7
#define FLOAT_ARG_REGISTERS 8 // v0-v7
BEGIN_AS_NAMESPACE
// x0-7: Argument registers (pass params or return results. OK as volatile local variables)
// x8: Indirect result register (e.g. address of large returned struct)
// x9-15: Volatile local variable registers
// x16-17: Intra-procedure-call temporary registers
// x18: Platform register (reserved for use of platform ABIs)
// x19-29: Non-volatile variable registers (must be saved and restored if modified)
// x29: Frame pointer register
// x30: Link register (where to return to)
extern "C" void GetHFAReturnDouble(asQWORD *out1, asQWORD *out2, asQWORD returnSize);
extern "C" void GetHFAReturnFloat(asQWORD *out1, asQWORD *out2, asQWORD returnSize);
extern "C" asQWORD CallARM64RetInMemory(
const asQWORD *gpRegArgs, asQWORD numGPRegArgs,
const asQWORD *floatRegArgs, asQWORD numFloatRegArgs,
const asQWORD *stackArgs, asQWORD numStackArgs,
void *retPointer, asFUNCTION_t func
);
extern "C" double CallARM64Double(
const asQWORD *gpRegArgs, asQWORD numGPRegArgs,
const asQWORD *floatRegArgs, asQWORD numFloatRegArgs,
const asQWORD *stackArgs, asQWORD numStackArgs,
asFUNCTION_t func
);
extern "C" float CallARM64Float(
const asQWORD *gpRegArgs, asQWORD numGPRegArgs,
const asQWORD *floatRegArgs, asQWORD numFloatRegArgs,
const asQWORD *stackArgs, asQWORD numStackArgs,
asFUNCTION_t func
);
extern "C" asQWORD CallARM64(
const asQWORD *gpRegArgs, asQWORD numGPRegArgs,
const asQWORD *floatRegArgs, asQWORD numFloatRegArgs,
const asQWORD *stackArgs, asQWORD numStackArgs,
asFUNCTION_t func
);
extern "C" asQWORD CallARM64Ret128(
const asQWORD *gpRegArgs, asQWORD numGPRegArgs,
const asQWORD *floatRegArgs, asQWORD numFloatRegArgs,
const asQWORD *stackArgs, asQWORD numStackArgs,
asQWORD *higherQWORD, asFUNCTION_t func
);
//
// If it's possible to fit in registers,
// there may not be enough float register space even if true is returned
//
static inline bool IsRegisterHFA(const asCDataType &type)
{
const asCTypeInfo *const typeInfo = type.GetTypeInfo();
if( typeInfo == nullptr ||
(typeInfo->flags & asOBJ_APP_CLASS_ALLFLOATS) == 0 ||
type.IsObjectHandle() || type.IsReference() )
return false;
const bool doubles = (typeInfo->flags & asOBJ_APP_CLASS_ALIGN8) != 0;
const int maxAllowedSize = doubles ? sizeof(double) * HFA_RET_REGISTERS : sizeof(float) * HFA_RET_REGISTERS;
return type.GetSizeInMemoryBytes() <= maxAllowedSize;
}
//
// If it's possible to fit it in registers,
// if true is returned there is enough space to fit
//
static inline bool IsRegisterHFAParameter(const asCDataType &type, const asQWORD numFloatRegArgs)
{
if( !IsRegisterHFA(type) )
return false;
const bool doubles = (type.GetTypeInfo()->flags & asOBJ_APP_CLASS_ALIGN8) != 0;
const int registersUsed = type.GetSizeInMemoryDWords() / (doubles ? sizeof(double) : sizeof(float));
return numFloatRegArgs + registersUsed <= FLOAT_ARG_REGISTERS;
}
asQWORD CallSystemFunctionNative(asCContext *context, asCScriptFunction *descr, void *obj, asDWORD *args, void *retPointer, asQWORD &retQW2, void *secondObject)
{
asCScriptEngine *engine = context->m_engine;
const asSSystemFunctionInterface *const sysFunc = descr->sysFuncIntf;
const asCDataType &retType = descr->returnType;
const asCTypeInfo *const retTypeInfo = retType.GetTypeInfo();
asFUNCTION_t func = sysFunc->func;
int callConv = sysFunc->callConv;
asQWORD retQW = 0;
asQWORD gpRegArgs[GP_ARG_REGISTERS];
asQWORD floatRegArgs[FLOAT_ARG_REGISTERS];
asQWORD stackArgs[64]; // It's how many x64 users can have
asQWORD numGPRegArgs = 0;
asQWORD numFloatRegArgs = 0;
asQWORD numStackArgs = 0;
asFUNCTION_t *vftable;
// Optimization to avoid check 12 values (all ICC_ that contains THISCALL)
if( (callConv >= ICC_THISCALL && callConv <= ICC_VIRTUAL_THISCALL_RETURNINMEM) ||
(callConv >= ICC_THISCALL_OBJLAST && callConv <= ICC_VIRTUAL_THISCALL_OBJFIRST_RETURNINMEM) )
{
// Add the object pointer as the first parameter
gpRegArgs[numGPRegArgs++] = (asQWORD)obj;
}
if( callConv == ICC_CDECL_OBJFIRST || callConv == ICC_CDECL_OBJFIRST_RETURNINMEM )
{
// Add the object pointer as the first parameter
gpRegArgs[numGPRegArgs++] = (asQWORD)obj;
}
else if( callConv == ICC_THISCALL_OBJFIRST || callConv == ICC_THISCALL_OBJFIRST_RETURNINMEM ||
callConv == ICC_VIRTUAL_THISCALL_OBJFIRST || callConv == ICC_VIRTUAL_THISCALL_OBJFIRST_RETURNINMEM )
{
// Add the object pointer as the first parameter
gpRegArgs[numGPRegArgs++] = (asQWORD)secondObject;
}
if( callConv == ICC_VIRTUAL_THISCALL || callConv == ICC_VIRTUAL_THISCALL_RETURNINMEM || callConv == ICC_VIRTUAL_THISCALL_OBJFIRST ||
callConv == ICC_VIRTUAL_THISCALL_OBJFIRST_RETURNINMEM || callConv == ICC_VIRTUAL_THISCALL_OBJLAST || callConv == ICC_VIRTUAL_THISCALL_OBJLAST_RETURNINMEM )
{
// Get virtual function table from the object pointer
vftable = *(asFUNCTION_t**)obj;
func = vftable[FuncPtrToUInt(func)/sizeof(void*)];
}
asUINT argsPos = 0;
for( asUINT n = 0; n < descr->parameterTypes.GetLength(); n++ )
{
const asCDataType &parmType = descr->parameterTypes[n];
const asCTypeInfo *const parmTypeInfo = parmType.GetTypeInfo();
if( parmType.IsObject() && !parmType.IsObjectHandle() && !parmType.IsReference() )
{
const asUINT parmDWords = parmType.GetSizeInMemoryDWords();
const asUINT parmQWords = (parmDWords >> 1) + (parmDWords & 1);
const bool passedAsPointer = parmQWords <= 2;
const bool fitsInRegisters = passedAsPointer ? (numGPRegArgs < GP_ARG_REGISTERS) : (numGPRegArgs + parmQWords <= GP_ARG_REGISTERS);
asQWORD *const argsArray = fitsInRegisters ? gpRegArgs : stackArgs;
asQWORD &numArgs = fitsInRegisters ? numGPRegArgs : numStackArgs;
if( (parmTypeInfo->flags & COMPLEX_MASK) )
{
argsArray[numArgs++] = *(asQWORD*)&args[argsPos];
argsPos += AS_PTR_SIZE;
}
else if( IsRegisterHFAParameter(parmType, numFloatRegArgs) )
{
if( (parmTypeInfo->flags & asOBJ_APP_CLASS_ALIGN8) != 0 )
{
const asQWORD *const contents = *(asQWORD**)&args[argsPos];
for( asUINT i = 0; i < parmQWords; i++ )
floatRegArgs[numFloatRegArgs++] = *(asQWORD*)&contents[i];
}
else
{
const asDWORD *const contents = *(asDWORD**)&args[argsPos];
for( asUINT i = 0; i < parmDWords; i++ )
floatRegArgs[numFloatRegArgs++] = *(asQWORD*)&contents[i];
}
engine->CallFree(*(char**)(args+argsPos));
argsPos += AS_PTR_SIZE;
}
else
{
// Copy the object's memory to the buffer
memcpy(&argsArray[numArgs], *(void**)(args+argsPos), parmType.GetSizeInMemoryBytes());
// Delete the original memory
engine->CallFree(*(char**)(args+argsPos));
argsPos += AS_PTR_SIZE;
numArgs += parmQWords;
}
}
else if( parmType.IsFloatType() && !parmType.IsReference() )
{
if( numFloatRegArgs >= FLOAT_ARG_REGISTERS )
stackArgs[numStackArgs++] = args[argsPos];
else
floatRegArgs[numFloatRegArgs++] = args[argsPos];
argsPos++;
}
else if( parmType.IsDoubleType() && !parmType.IsReference() )
{
if( numFloatRegArgs >= FLOAT_ARG_REGISTERS )
stackArgs[numStackArgs++] = *(asQWORD*)&args[argsPos];
else
floatRegArgs[numFloatRegArgs++] = *(asQWORD*)&args[argsPos];
argsPos += 2;
}
else
{
// Copy the value directly
const asUINT parmDWords = parmType.GetSizeOnStackDWords();
const asUINT parmQWords = (parmDWords >> 1) + (parmDWords & 1);
const bool fitsInRegisters = numGPRegArgs + parmQWords <= GP_ARG_REGISTERS;
asQWORD *const argsArray = fitsInRegisters ? gpRegArgs : stackArgs;
asQWORD &numArgs = fitsInRegisters ? numGPRegArgs : numStackArgs;
memcpy(&argsArray[numArgs], (void*)(args+argsPos), parmDWords * 4);
argsPos += parmDWords;
numArgs += parmQWords;
}
}
if( callConv == ICC_CDECL_OBJLAST || callConv == ICC_CDECL_OBJLAST_RETURNINMEM )
{
// Add the object pointer as the last parameter
if( numGPRegArgs < GP_ARG_REGISTERS )
gpRegArgs[numGPRegArgs++] = (asQWORD)obj;
else
stackArgs[numStackArgs++] = (asQWORD)obj;
}
else if( callConv == ICC_THISCALL_OBJLAST || callConv == ICC_THISCALL_OBJLAST_RETURNINMEM ||
callConv == ICC_VIRTUAL_THISCALL_OBJLAST || callConv == ICC_VIRTUAL_THISCALL_OBJLAST_RETURNINMEM )
{
// Add the object pointer as the last parameter
if( numGPRegArgs < GP_ARG_REGISTERS )
gpRegArgs[numGPRegArgs++] = (asQWORD)secondObject;
else
stackArgs[numStackArgs++] = (asQWORD)secondObject;
}
if( IsRegisterHFA(retType) && !(retTypeInfo->flags & COMPLEX_MASK) )
{
// This is to deal with HFAs (Homogeneous Floating-point Aggregates):
// ARM64 will place all-float composite types (of equal precision)
// with <= 4 members in the float return registers
const int structSize = retType.GetSizeInMemoryBytes();
CallARM64(gpRegArgs, numGPRegArgs, floatRegArgs, numFloatRegArgs, stackArgs, numStackArgs, func);
if( (retTypeInfo->flags & asOBJ_APP_CLASS_ALIGN8) != 0 )
{
if( structSize <= sizeof(double) * 2 )
GetHFAReturnDouble(&retQW, &retQW2, structSize);
else
GetHFAReturnDouble((asQWORD*)retPointer, ((asQWORD*)retPointer) + 1, structSize);
}
else
GetHFAReturnFloat(&retQW, &retQW2, structSize);
}
else if( sysFunc->hostReturnFloat )
{
if( sysFunc->hostReturnSize == 1 )
*(float*)&retQW = CallARM64Float(gpRegArgs, numGPRegArgs, floatRegArgs, numFloatRegArgs, stackArgs, numStackArgs, func);
else
*(double*)&retQW = CallARM64Double(gpRegArgs, numGPRegArgs, floatRegArgs, numFloatRegArgs, stackArgs, numStackArgs, func);
}
else if( sysFunc->hostReturnInMemory )
retQW = CallARM64RetInMemory(gpRegArgs, numGPRegArgs, floatRegArgs, numFloatRegArgs, stackArgs, numStackArgs, retPointer, func);
else
{
if( retType.GetSizeInMemoryBytes() > sizeof(asQWORD) )
retQW = CallARM64Ret128(gpRegArgs, numGPRegArgs, floatRegArgs, numFloatRegArgs, stackArgs, numStackArgs, &retQW2, func);
else
retQW = CallARM64(gpRegArgs, numGPRegArgs, floatRegArgs, numFloatRegArgs, stackArgs, numStackArgs, func);
}
return retQW;
}
END_AS_NAMESPACE
#endif // AS_ARM64
#endif // AS_MAX_PORTABILITY

View File

@ -0,0 +1,227 @@
//
// AngelCode Scripting Library
// Copyright (c) 2020-2021 Andreas Jonsson
//
// This software is provided 'as-is', without any express or implied
// warranty. In no event will the authors be held liable for any
// damages arising from the use of this software.
//
// Permission is granted to anyone to use this software for any
// purpose, including commercial applications, and to alter it and
// redistribute it freely, subject to the following restrictions:
//
// 1. The origin of this software must not be misrepresented// you
// must not claim that you wrote the original software. If you use
// this software in a product, an acknowledgment in the product
// documentation would be appreciated but is not required.
//
// 2. Altered source versions must be plainly marked as such, and
// must not be misrepresented as being the original software.
//
// 3. This notice may not be removed or altered from any source
// distribution.
//
// The original version of this library can be located at:
// http://www.angelcode.com/angelscript/
//
// Andreas Jonsson
// andreas@angelcode.com
//
// Assembly routines for the ARM64/AArch64 call convention used for Linux
// Written by Max Waine in July 2020, based on as_callfunc_arm_msvc.asm,
// with assistance & guidance provided by Sir Kane
// Compile with GCC/GAS
#if !defined(AS_MAX_PORTABILITY)
#if defined(__aarch64__)
.arch armv8-a
.text
.global GetHFAReturnDouble
.global GetHFAReturnFloat
.global CallARM64Ret128
.global CallARM64RetInMemory
.global CallARM64Double
.global CallARM64Float
.global CallARM64
.type GetHFAReturnDouble, %function
.type GetHFAReturnFloat, %function
.type CallARM64Ret128, %function
.type CallARM64RetInMemory, %function
.type CallARM64Double, %function
.type CallARM64Float, %function
.type CallARM64, %function
.align 2
GetHFAReturnDouble:
adr x9, populateDoubles
sub x9, x9, x1, lsr 1 // x9 -= returnSize >> 1; (/2 because double is 2x instruction size)
br x9
str d3, [x0, #0x18]
str d2, [x0, #0x10]
str d1, [x1]
str d0, [x0]
populateDoubles:
ret
.align 2
GetHFAReturnFloat:
adr x9, populateFloats
sub x9, x9, x2 // x9 -= returnSize; (already 4 bytes per return)
br x9
str s3, [x1, #0x4]
str s2, [x1]
str s1, [x0, #0x4]
str s0, [x0]
populateFloats:
ret
//[returnType] CallARM64[type](
// const asQWORD *gpRegArgs, asQWORD numGPRegArgs,
// const asQWORD *floatRegArgs, asQWORD numFloatRegArgs,
// const asQWORD *stackArgs, asQWORD numStackArgs,
// asFUNCTION_t func
//)
.align 2
CallARM64Double:
CallARM64Float:
CallARM64:
.cfi_startproc
stp fp, lr, [sp,#-0x20]!
str x20, [sp,#0x10]
.cfi_def_cfa_offset 0x20
.cfi_offset 20, 0x10
.cfi_offset fp, -0x20
.cfi_offset lr, -0x18
mov fp, sp
mov x20, #0
cbz x5, stackArgsLoopEnd
// Align count to 2, then multiply by 8, resulting in a size aligned to 16
add x20, x5, #1
lsl x20, x20, #3
and x20, x20, #-0x10
// Multiply count by 8
lsl x10, x5, #3
sub sp, sp, x20
stackArgsLoopStart:
ldp x9,x11, [x4],#16
stp x9,x11, [sp],#16
subs x10, x10, #16
bgt stackArgsLoopStart
stackArgsLoopEnd:
// Calculate amount to jump forward, avoiding pointless instructions
adr x9, populateFloatRegisterArgsEnd
sub x9, x9, x3, lsl 2 // x9 -= numFloatRegArgs * 4
br x9
ldr d7, [x2, #0x38]
ldr d6, [x2, #0x30]
ldr d5, [x2, #0x28]
ldr d4, [x2, #0x20]
ldr d3, [x2, #0x18]
ldr d2, [x2, #0x10]
ldr d1, [x2, #0x08]
ldr d0, [x2]
populateFloatRegisterArgsEnd:
mov x15, x6
// Calculate amount to jump forward, avoiding pointless instructions
adr x9, populateGPRegisterArgsEnd
sub x9, x9, x1, lsl 2 // x9 -= numGPRegArgs * 4
br x9
ldr x7, [x0, #0x38]
ldr x6, [x0, #0x30]
ldr x5, [x0, #0x28]
ldr x4, [x0, #0x20]
ldr x3, [x0, #0x18]
ldr x2, [x0, #0x10]
ldr x1, [x0, #0x08]
ldr x0, [x0]
populateGPRegisterArgsEnd:
// Actually call function
sub sp, sp, x20
blr x15
add sp, sp, x20
ldr x20, [sp,#0x10]
ldp fp, lr, [sp],#0x20
.cfi_restore lr
.cfi_restore fp
.cfi_restore 20
.cfi_def_cfa_offset 0
ret
.cfi_endproc
.align 2
CallARM64Ret128:
.cfi_startproc
stp fp, lr, [sp,#-0x20]!
str x20, [sp,#0x10]
.cfi_def_cfa_offset 0x20
.cfi_offset 20, 0x10
.cfi_offset fp, -0x20
.cfi_offset lr, -0x18
mov fp, sp
mov x20, x6
mov x6, x7
mov x7, #0
bl CallARM64
str x1, [x20]
ldr x20, [sp,#0x10]
ldp fp, lr, [sp],#0x20
.cfi_restore lr
.cfi_restore fp
.cfi_restore 20
.cfi_def_cfa_offset 0
ret
.cfi_endproc
.align 2
CallARM64RetInMemory:
.cfi_startproc
stp fp, lr, [sp,#-0x10]!
mov fp, sp
.cfi_def_cfa_offset 0x10
.cfi_offset fp, -0x10
.cfi_offset lr, -0x08
mov x8, x6
mov x6, x7
mov x7, #0
bl CallARM64
mov x0, x8
ldp fp, lr, [sp],#0x10
.cfi_restore lr
.cfi_restore fp
.cfi_def_cfa_offset 0
ret
.cfi_endproc
#endif /* __aarch64__ */
#endif /* !AS_MAX_PORTABILITY */

View File

@ -0,0 +1,205 @@
;
; AngelCode Scripting Library
; Copyright (c) 2020-2020 Andreas Jonsson
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any
; damages arising from the use of this software.
;
; Permission is granted to anyone to use this software for any
; purpose, including commercial applications, and to alter it and
; redistribute it freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you
; must not claim that you wrote the original software. If you use
; this software in a product, an acknowledgment in the product
; documentation would be appreciated but is not required.
;
; 2. Altered source versions must be plainly marked as such, and
; must not be misrepresented as being the original software.
;
; 3. This notice may not be removed or altered from any source
; distribution.
;
; The original version of this library can be located at:
; http://www.angelcode.com/angelscript/
;
; Andreas Jonsson
; andreas@angelcode.com
;
; Assembly routines for the ARM64/AArch64 call convention used for Windows 10 on ARM
; Written by Max Waine in July 2020, based on as_callfunc_arm_msvc.asm
; MSVC currently doesn't support inline assembly for the ARM64 platform,
; and if they're treating it like x64 /won't/ ever support inline assembly,
; so this separate file is needed.
; Compile with Microsoft ARM64 assembler (armasm64)
; http://msdn.microsoft.com/en-us/library/hh873190.aspx
AREA |.rdata|, DATA, READONLY
EXPORT GetHFAReturnDouble
EXPORT GetHFAReturnFloat
EXPORT CallARM64Ret128
EXPORT CallARM64RetInMemory
EXPORT CallARM64Double
EXPORT CallARM64Float
EXPORT CallARM64
AREA |.text|, CODE, ALIGN=2
ALIGN 4
GetHFAReturnDouble PROC
adr x9, |populateDoubles|
sub x9, x9, x1, lsr 1 ; x9 -= returnSize >> 1; (/2 because double is 2x instruction size)
br x9
str d3, [x0, #0x18]
str d2, [x0, #0x10]
str d1, [x1]
str d0, [x0]
|populateDoubles|
ret
ENDP ; GetHFAReturnDouble
ALIGN 4
GetHFAReturnFloat PROC
adr x9, |populateFloats|
sub x9, x9, x2 // x9 -= returnSize; (already 4 bytes per return)
br x9
str s3, [x1, #0x4]
str s2, [x1]
str s1, [x0, #0x4]
str s0, [x0]
|populateFloats|
ret
ENDP ; GetHFAReturnFloat
;[returnType] CallARM64[type](
; const asQWORD *gpRegArgs, asQWORD numGPRegArgs,
; const asQWORD *floatRegArgs, asQWORD numFloatRegArgs,
; const asQWORD *stackArgs, asQWORD numStackArgs,
; asFUNCTION_t func
;)
ALIGN 4
CallARM64Double PROC
stp fp, lr, [sp,#-0x10]!
bl CallARM64
ldp fp, lr, [sp,#-0x10]!
ret
ENDP ; CallARM64Double
ALIGN 4
CallARM64Float PROC
stp fp, lr, [sp,#-0x10]!
bl CallARM64
ldp fp, lr, [sp,#-0x10]!
ret
ENDP ; CallARM64Float
ALIGN 4
CallARM64 PROC
stp fp, lr, [sp,#-0x20]!
str x20, [sp,#0x10]
mov x20, #0;
cbz x5, |stackArgsLoopEnd|
; Align count to 2, then multiply by 8, resulting in a size aligned to 16
add x20, x5, #1
lsl x20, x20, #3
and x20, x20, #-0x10
; Multiply count by 8
lsl x10, x5, #3
sub sp, sp, x20
|stackArgsLoopStart|
ldp x9,x11, [x4],#16
stp x9,x11, [sp],#16
subs x10, x10, #16
bgt |stackArgsLoopStart|
|stackArgsLoopEnd|
; Calculate amount to jump forward, avoiding pointless instructions
adr x9, |populateFloatRegisterArgsEnd|
sub x9, x9, x3, lsl 2 ; x9 -= numFloatRegArgs * 4
br x9
ldr d7, [x2, #0x38]
ldr d6, [x2, #0x30]
ldr d5, [x2, #0x28]
ldr d4, [x2, #0x20]
ldr d3, [x2, #0x18]
ldr d2, [x2, #0x10]
ldr d1, [x2, #0x08]
ldr d0, [x2]
|populateFloatRegisterArgsEnd|
mov x15, x6
; Calculate amount to jump forward, avoiding pointless instructions
adr x9, |populateGPRegisterArgsEnd|
sub x9, x9, x1, lsl 2 ; x9 -= numGPRegArgs * 4
br x9
ldr x7, [x0, #0x38]
ldr x6, [x0, #0x30]
ldr x5, [x0, #0x28]
ldr x4, [x0, #0x20]
ldr x3, [x0, #0x18]
ldr x2, [x0, #0x10]
ldr x1, [x0, #0x08]
ldr x0, [x0]
|populateGPRegisterArgsEnd|
; Actually call function
sub sp, sp, x20
blr x15
add sp, sp, x20
ldr x20, [sp,#0x10]
ldp fp, lr, [sp],#0x20
ret
ENDP ; CallARM64
ALIGN 4
CallARM64Ret128 PROC
stp fp, lr, [sp,#-0x20]!
str x20, [sp,#0x10]
mov fp, sp
mov x20, x6
mov x6, x7
mov x7, #0
bl CallARM64
str x1, [x20]
ldr x20, [sp,#0x10]
ldp fp, lr, [sp],#0x20
ret ; CallARM64Ret128
ALIGN 4
CallARM64RetInMemory PROC
stp fp, lr, [sp,#-0x10]!
mov fp, sp
mov x8, x6
mov x6, x7
mov x7, #0
bl CallARM64
mov x0, x8
ldp fp, lr, [sp],#0x10
ret ; CallARM64RetInMemory
END