Changeset View
Changeset View
Standalone View
Standalone View
head/lang/ghc/files/patch-fix-build-on-arm
Property | Old Value | New Value |
---|---|---|
fbsd:nokeywords | null | yes \ No newline at end of property |
svn:eol-style | null | native \ No newline at end of property |
svn:mime-type | null | text/plain \ No newline at end of property |
From d8495549ba9d194815c2d0eaee6797fc7c00756a Mon Sep 17 00:00:00 2001 | |||||
From: Kavon Farvardin <kavon@farvard.in> | |||||
Date: Sun, 28 Oct 2018 12:11:49 -0400 | |||||
Subject: [PATCH] Fix for T14251 on ARM | |||||
We now calculate the SSE register padding needed to fix the calling | |||||
convention in LLVM in a robust way: grouping them by whether | |||||
registers in that class overlap (with the same class overlapping | |||||
itself). | |||||
My prior patch assumed that no matter the platform, physical | |||||
register Fx aliases with Dx, etc, for our calling convention. | |||||
This is unfortunately not the case for any platform except x86-64. | |||||
Test Plan: | |||||
Only know how to test on x86-64, but it should be tested on ARM with: | |||||
`make test WAYS=llvm && make test WAYS=optllvm` | |||||
Reviewers: bgamari, angerman | |||||
Reviewed By: bgamari | |||||
Subscribers: rwbarton, carter | |||||
GHC Trac Issues: #15780, #14251, #15747 | |||||
Differential Revision: https://phabricator.haskell.org/D5254 | |||||
--- | |||||
compiler/llvmGen/LlvmCodeGen/Base.hs | 123 ++++++++++++++++++++++---------- | |||||
compiler/llvmGen/LlvmCodeGen/CodeGen.hs | 6 +- | |||||
2 files changed, 90 insertions(+), 39 deletions(-) | |||||
diff --git a/compiler/llvmGen/LlvmCodeGen/Base.hs b/compiler/llvmGen/LlvmCodeGen/Base.hs | |||||
index ec91bac..0a40b73 100644 | |||||
--- compiler/llvmGen/LlvmCodeGen/Base.hs | |||||
+++ compiler/llvmGen/LlvmCodeGen/Base.hs | |||||
@@ -26,7 +26,7 @@ module LlvmCodeGen.Base ( | |||||
cmmToLlvmType, widthToLlvmFloat, widthToLlvmInt, llvmFunTy, | |||||
llvmFunSig, llvmFunArgs, llvmStdFunAttrs, llvmFunAlign, llvmInfAlign, | |||||
- llvmPtrBits, tysToParams, llvmFunSection, padLiveArgs, isSSE, | |||||
+ llvmPtrBits, tysToParams, llvmFunSection, padLiveArgs, isFPR, | |||||
strCLabel_llvm, strDisplayName_llvm, strProcedureName_llvm, | |||||
getGlobalPtr, generateExternDecls, | |||||
@@ -47,6 +47,7 @@ import CodeGen.Platform ( activeStgRegs ) | |||||
import DynFlags | |||||
import FastString | |||||
import Cmm hiding ( succ ) | |||||
+import CmmUtils ( regsOverlap ) | |||||
import Outputable as Outp | |||||
import Platform | |||||
import UniqFM | |||||
@@ -58,8 +59,7 @@ import ErrUtils | |||||
import qualified Stream | |||||
import Control.Monad (ap) | |||||
-import Data.List (sort) | |||||
-import Data.Maybe (mapMaybe) | |||||
+import Data.List (sort, groupBy, head) | |||||
-- ---------------------------------------------------------------------------- | |||||
-- * Some Data Types | |||||
@@ -152,36 +152,91 @@ llvmFunArgs dflags live = | |||||
map (lmGlobalRegArg dflags) (filter isPassed allRegs) | |||||
where platform = targetPlatform dflags | |||||
allRegs = activeStgRegs platform | |||||
- paddedLive = map (\(_,r) -> r) $ padLiveArgs live | |||||
+ paddedLive = map (\(_,r) -> r) $ padLiveArgs dflags live | |||||
isLive r = r `elem` alwaysLive || r `elem` paddedLive | |||||
- isPassed r = not (isSSE r) || isLive r | |||||
- | |||||
- | |||||
-isSSE :: GlobalReg -> Bool | |||||
-isSSE (FloatReg _) = True | |||||
-isSSE (DoubleReg _) = True | |||||
-isSSE (XmmReg _) = True | |||||
-isSSE (YmmReg _) = True | |||||
-isSSE (ZmmReg _) = True | |||||
-isSSE _ = False | |||||
- | |||||
-sseRegNum :: GlobalReg -> Maybe Int | |||||
-sseRegNum (FloatReg i) = Just i | |||||
-sseRegNum (DoubleReg i) = Just i | |||||
-sseRegNum (XmmReg i) = Just i | |||||
-sseRegNum (YmmReg i) = Just i | |||||
-sseRegNum (ZmmReg i) = Just i | |||||
-sseRegNum _ = Nothing | |||||
- | |||||
--- the bool indicates whether the global reg was added as padding. | |||||
--- the returned list is not sorted in any particular order, | |||||
--- but does indicate the set of live registers needed, with SSE padding. | |||||
-padLiveArgs :: LiveGlobalRegs -> [(Bool, GlobalReg)] | |||||
-padLiveArgs live = allRegs | |||||
+ isPassed r = not (isFPR r) || isLive r | |||||
+ | |||||
+ | |||||
+isFPR :: GlobalReg -> Bool | |||||
+isFPR (FloatReg _) = True | |||||
+isFPR (DoubleReg _) = True | |||||
+isFPR (XmmReg _) = True | |||||
+isFPR (YmmReg _) = True | |||||
+isFPR (ZmmReg _) = True | |||||
+isFPR _ = False | |||||
+ | |||||
+sameFPRClass :: GlobalReg -> GlobalReg -> Bool | |||||
+sameFPRClass (FloatReg _) (FloatReg _) = True | |||||
+sameFPRClass (DoubleReg _) (DoubleReg _) = True | |||||
+sameFPRClass (XmmReg _) (XmmReg _) = True | |||||
+sameFPRClass (YmmReg _) (YmmReg _) = True | |||||
+sameFPRClass (ZmmReg _) (ZmmReg _) = True | |||||
+sameFPRClass _ _ = False | |||||
+ | |||||
+normalizeFPRNum :: GlobalReg -> GlobalReg | |||||
+normalizeFPRNum (FloatReg _) = FloatReg 1 | |||||
+normalizeFPRNum (DoubleReg _) = DoubleReg 1 | |||||
+normalizeFPRNum (XmmReg _) = XmmReg 1 | |||||
+normalizeFPRNum (YmmReg _) = YmmReg 1 | |||||
+normalizeFPRNum (ZmmReg _) = ZmmReg 1 | |||||
+normalizeFPRNum _ = error "normalizeFPRNum expected only FPR regs" | |||||
+ | |||||
+getFPRCtor :: GlobalReg -> Int -> GlobalReg | |||||
+getFPRCtor (FloatReg _) = FloatReg | |||||
+getFPRCtor (DoubleReg _) = DoubleReg | |||||
+getFPRCtor (XmmReg _) = XmmReg | |||||
+getFPRCtor (YmmReg _) = YmmReg | |||||
+getFPRCtor (ZmmReg _) = ZmmReg | |||||
+getFPRCtor _ = error "getFPRCtor expected only FPR regs" | |||||
+ | |||||
+fprRegNum :: GlobalReg -> Int | |||||
+fprRegNum (FloatReg i) = i | |||||
+fprRegNum (DoubleReg i) = i | |||||
+fprRegNum (XmmReg i) = i | |||||
+fprRegNum (YmmReg i) = i | |||||
+fprRegNum (ZmmReg i) = i | |||||
+fprRegNum _ = error "fprRegNum expected only FPR regs" | |||||
+ | |||||
+-- | Input: dynflags, and the list of live registers | |||||
+-- | |||||
+-- Output: An augmented list of live registers, where padding was | |||||
+-- added to the list of registers to ensure the calling convention is | |||||
+-- correctly used by LLVM. | |||||
+-- | |||||
+-- Each global reg in the returned list is tagged with a bool, which | |||||
+-- indicates whether the global reg was added as padding, or was an original | |||||
+-- live register. | |||||
+-- | |||||
+-- That is, True => padding, False => a real, live global register. | |||||
+-- | |||||
+-- Also, the returned list is not sorted in any particular order. | |||||
+-- | |||||
+padLiveArgs :: DynFlags -> LiveGlobalRegs -> [(Bool, GlobalReg)] | |||||
+padLiveArgs dflags live = | |||||
+ if platformUnregisterised plat | |||||
+ then taggedLive -- not using GHC's register convention for platform. | |||||
+ else padding ++ taggedLive | |||||
+ where | |||||
+ taggedLive = map (\x -> (False, x)) live | |||||
+ plat = targetPlatform dflags | |||||
+ | |||||
+ fprLive = filter isFPR live | |||||
+ padding = concatMap calcPad $ groupBy sharesClass fprLive | |||||
+ | |||||
+ sharesClass :: GlobalReg -> GlobalReg -> Bool | |||||
+ sharesClass a b = sameFPRClass a b || overlappingClass | |||||
+ where | |||||
+ overlappingClass = regsOverlap dflags (norm a) (norm b) | |||||
+ norm = CmmGlobal . normalizeFPRNum | |||||
+ | |||||
+ calcPad :: [GlobalReg] -> [(Bool, GlobalReg)] | |||||
+ calcPad rs = getFPRPadding (getFPRCtor $ head rs) rs | |||||
+ | |||||
+getFPRPadding :: (Int -> GlobalReg) -> LiveGlobalRegs -> [(Bool, GlobalReg)] | |||||
+getFPRPadding paddingCtor live = padding | |||||
where | |||||
- sseRegNums = sort $ mapMaybe sseRegNum live | |||||
- (_, padding) = foldl assignSlots (1, []) $ sseRegNums | |||||
- allRegs = padding ++ map (\r -> (False, r)) live | |||||
+ fprRegNums = sort $ map fprRegNum live | |||||
+ (_, padding) = foldl assignSlots (1, []) $ fprRegNums | |||||
assignSlots (i, acc) regNum | |||||
| i == regNum = -- don't need padding here | |||||
@@ -195,11 +250,7 @@ padLiveArgs live = allRegs | |||||
genPad start n = | |||||
take n $ flip map (iterate (+1) start) (\i -> | |||||
- (True, FloatReg i)) | |||||
- -- NOTE: Picking float should be fine for the following reasons: | |||||
- -- (1) Float aliases with all the other SSE register types on | |||||
- -- the given platform. | |||||
- -- (2) The argument is not live anyways. | |||||
+ (True, paddingCtor i)) | |||||
-- | Llvm standard fun attributes | |||||
diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs | |||||
index 1873400..21abc65 100644 | |||||
--- compiler/llvmGen/LlvmCodeGen/CodeGen.hs | |||||
+++ compiler/llvmGen/LlvmCodeGen/CodeGen.hs | |||||
@@ -1814,14 +1814,14 @@ funPrologue live cmmBlocks = do | |||||
-- STG Liveness optimisation done here. | |||||
funEpilogue :: LiveGlobalRegs -> LlvmM ([LlvmVar], LlvmStatements) | |||||
funEpilogue live = do | |||||
+ dflags <- getDynFlags | |||||
-- the bool indicates whether the register is padding. | |||||
let alwaysNeeded = map (\r -> (False, r)) alwaysLive | |||||
- livePadded = alwaysNeeded ++ padLiveArgs live | |||||
+ livePadded = alwaysNeeded ++ padLiveArgs dflags live | |||||
-- Set to value or "undef" depending on whether the register is | |||||
-- actually live | |||||
- dflags <- getDynFlags | |||||
let loadExpr r = do | |||||
(v, _, s) <- getCmmRegVal (CmmGlobal r) | |||||
return (Just $ v, s) | |||||
@@ -1833,7 +1833,7 @@ funEpilogue live = do | |||||
loads <- flip mapM allRegs $ \r -> case () of | |||||
_ | (False, r) `elem` livePadded | |||||
-> loadExpr r -- if r is not padding, load it | |||||
- | not (isSSE r) || (True, r) `elem` livePadded | |||||
+ | not (isFPR r) || (True, r) `elem` livePadded | |||||
-> loadUndef r | |||||
| otherwise -> return (Nothing, nilOL) | |||||
-- | |||||
1.9.1 | |||||