@@ -709,16 +709,44 @@ CPUBackend::CompiledCode Arm64JITCore::CompileCode(uint64_t Entry, uint64_t Size
709709 // Fairly excessive buffer range to make sure we don't overflow
710710 uint32_t BufferRange = SSACount * 16 ;
711711
712- LOGMAN_THROW_A_FMT (CurrentCodeBuffer->LookupCache .get () == ThreadState->LookupCache ->Shared , " INVARIANT VIOLATED: SharedLookupCache "
713- " doesn't match up!\n " );
714- if (auto Prev = CheckCodeBufferUpdate ()) {
715- ThreadState->LookupCache ->ChangeGuestToHostMapping (*Prev, *CurrentCodeBuffer->LookupCache );
716- }
712+ auto RefreshCodeBuffer = [this , BufferRange](bool Align) {
713+ LOGMAN_THROW_A_FMT (CurrentCodeBuffer->LookupCache .get () == ThreadState->LookupCache ->Shared , " INVARIANT VIOLATED: SharedLookupCache "
714+ " doesn't match up!\n " );
715+ if (auto Prev = CheckCodeBufferUpdate ()) {
716+ ThreadState->LookupCache ->ChangeGuestToHostMapping (*Prev, *CurrentCodeBuffer->LookupCache );
717+ }
718+
719+ SetBuffer (CurrentCodeBuffer->Ptr , CurrentCodeBuffer->Size );
720+ SetCursorOffset (Align ? AlignUp (CodeBuffers.LatestOffset , 16 ) : CodeBuffers.LatestOffset );
721+ if ((GetCursorOffset () + BufferRange) > (CurrentCodeBuffer->Size - Utils::FEX_PAGE_SIZE)) {
722+ CTX->ClearCodeCache (ThreadState);
723+ }
724+
725+ if (Align) {
726+ Align16B ();
727+ }
728+
729+ CodeBuffers.LatestOffset = GetCursorOffset ();
730+ };
717731
718- SetBuffer (CurrentCodeBuffer->Ptr , CurrentCodeBuffer->Size );
719- SetCursorOffset (CodeBuffers.LatestOffset );
720- if ((GetCursorOffset () + BufferRange) > (CurrentCodeBuffer->Size - Utils::FEX_PAGE_SIZE)) {
721- CTX->ClearCodeCache (ThreadState);
732+ static thread_local std::unique_ptr<CodeBuffer> TempCodeBuffer;
733+ {
734+ // FEXCORE_PROFILE_SCOPED("AcquireLock1");
735+ // CodeData.CodeBufferLock = std::unique_lock { CodeBuffers.CodeBufferWriteMutex, std::try_to_lock };
736+ CodeData.CodeBufferLock = {};
737+ }
738+ if (CodeData.CodeBufferLock ) {
739+ RefreshCodeBuffer (false );
740+ } else {
741+ // Another thread is holding the mutex for compiling, so this thread will compile to a
742+ // temporary buffer instead. We'll still need to wait for the mutex later (to relocate to
743+ // the main CodeBuffer), but we can do useful work in the meantime.
744+ auto DesiredSize = AlignUp (BufferRange, Utils::FEX_PAGE_SIZE) + Utils::FEX_PAGE_SIZE /* Guard area */ ;
745+ if (!TempCodeBuffer || TempCodeBuffer->Size < DesiredSize) {
746+ // TODO: Don't use CodeBuffer, since that will also allocate a LookupCache...
747+ TempCodeBuffer = std::make_unique<CodeBuffer>(DesiredSize);
748+ }
749+ SetBuffer (TempCodeBuffer->Ptr , TempCodeBuffer->Size );
722750 }
723751
724752 CodeData.BlockBegin = GetCursorAddress<uint8_t *>();
@@ -892,6 +920,32 @@ CPUBackend::CompiledCode Arm64JITCore::CompileCode(uint64_t Entry, uint64_t Size
892920
893921 JITBlockTail->Size = CodeData.Size ;
894922
923+ if (!CodeData.CodeBufferLock ) {
924+ // We failed locking this mutex before, so we compiled to TempCodeBuffer instead.
925+ // Migrate the compile output to the actual CodeBuffer.
926+ {
927+ FEXCORE_PROFILE_SCOPED (" AcquireLock2" );
928+ CodeData.CodeBufferLock = std::unique_lock { CodeBuffers.CodeBufferWriteMutex };
929+ }
930+
931+ const auto TempSize = GetCursorOffset ();
932+
933+ // NOTE: 16-byte alignment for block linking records must be preserved here
934+ RefreshCodeBuffer (true );
935+
936+ // Adjust host addresses
937+ const auto Delta = GetCursorAddress<uint8_t *>() - CodeData.BlockBegin ;
938+ CodeBegin += Delta;
939+ CodeData.BlockBegin += Delta;
940+ for (auto & EntryPoint : CodeData.EntryPoints ) {
941+ EntryPoint.second += Delta;
942+ }
943+
944+ // Copy over CodeBuffer contents
945+ memcpy (GetCursorAddress<uint8_t *>(), TempCodeBuffer->Ptr , TempSize);
946+ SetCursorOffset (CodeBuffers.LatestOffset + TempSize);
947+ }
948+
895949 CodeBuffers.LatestOffset = GetCursorOffset ();
896950
897951 ClearICache (CodeData.BlockBegin , CodeOnlySize);
0 commit comments