diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 5991cf23d5dc..cf6549e2a5bd 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -1,5024 +1,5005 @@ //===--- TokenAnnotator.cpp - Format C++ code -----------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// /// \file /// This file implements a token annotator, i.e. creates /// \c AnnotatedTokens out of \c FormatTokens with required extra information. /// //===----------------------------------------------------------------------===// #include "TokenAnnotator.h" #include "FormatToken.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TokenKinds.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/Debug.h" #define DEBUG_TYPE "format-token-annotator" namespace clang { namespace format { namespace { /// Returns \c true if the line starts with a token that can start a statement /// with an initializer. static bool startsWithInitStatement(const AnnotatedLine &Line) { return Line.startsWith(tok::kw_for) || Line.startsWith(tok::kw_if) || Line.startsWith(tok::kw_switch); } /// Returns \c true if the token can be used as an identifier in /// an Objective-C \c \@selector, \c false otherwise. /// /// Because getFormattingLangOpts() always lexes source code as /// Objective-C++, C++ keywords like \c new and \c delete are /// lexed as tok::kw_*, not tok::identifier, even for Objective-C. /// /// For Objective-C and Objective-C++, both identifiers and keywords /// are valid inside @selector(...) (or a macro which /// invokes @selector(...)). So, we allow treat any identifier or /// keyword as a potential Objective-C selector component. static bool canBeObjCSelectorComponent(const FormatToken &Tok) { return Tok.Tok.getIdentifierInfo() != nullptr; } /// With `Left` being '(', check if we're at either `[...](` or /// `[...]<...>(`, where the [ opens a lambda capture list. static bool isLambdaParameterList(const FormatToken *Left) { // Skip <...> if present. if (Left->Previous && Left->Previous->is(tok::greater) && Left->Previous->MatchingParen && Left->Previous->MatchingParen->is(TT_TemplateOpener)) { Left = Left->Previous->MatchingParen; } // Check for `[...]`. return Left->Previous && Left->Previous->is(tok::r_square) && Left->Previous->MatchingParen && Left->Previous->MatchingParen->is(TT_LambdaLSquare); } /// Returns \c true if the token is followed by a boolean condition, \c false /// otherwise. static bool isKeywordWithCondition(const FormatToken &Tok) { return Tok.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch, tok::kw_constexpr, tok::kw_catch); } /// A parser that gathers additional information about tokens. /// /// The \c TokenAnnotator tries to match parenthesis and square brakets and /// store a parenthesis levels. It also tries to resolve matching "<" and ">" /// into template parameter lists. class AnnotatingParser { public: AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line, const AdditionalKeywords &Keywords) : Style(Style), Line(Line), CurrentToken(Line.First), AutoFound(false), Keywords(Keywords) { Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false)); resetTokenMetadata(); } private: bool parseAngle() { if (!CurrentToken || !CurrentToken->Previous) return false; if (NonTemplateLess.count(CurrentToken->Previous)) return false; const FormatToken &Previous = *CurrentToken->Previous; // The '<'. if (Previous.Previous) { if (Previous.Previous->Tok.isLiteral()) return false; if (Previous.Previous->is(tok::r_paren) && Contexts.size() > 1 && (!Previous.Previous->MatchingParen || !Previous.Previous->MatchingParen->is( TT_OverloadedOperatorLParen))) { return false; } } FormatToken *Left = CurrentToken->Previous; Left->ParentBracket = Contexts.back().ContextKind; ScopedContextCreator ContextCreator(*this, tok::less, 12); // If this angle is in the context of an expression, we need to be more // hesitant to detect it as opening template parameters. bool InExprContext = Contexts.back().IsExpression; Contexts.back().IsExpression = false; // If there's a template keyword before the opening angle bracket, this is a // template parameter, not an argument. if (Left->Previous && Left->Previous->isNot(tok::kw_template)) Contexts.back().ContextType = Context::TemplateArgument; if (Style.Language == FormatStyle::LK_Java && CurrentToken->is(tok::question)) { next(); } while (CurrentToken) { if (CurrentToken->is(tok::greater)) { // Try to do a better job at looking for ">>" within the condition of // a statement. Conservatively insert spaces between consecutive ">" // tokens to prevent splitting right bitshift operators and potentially // altering program semantics. This check is overly conservative and // will prevent spaces from being inserted in select nested template // parameter cases, but should not alter program semantics. if (CurrentToken->Next && CurrentToken->Next->is(tok::greater) && Left->ParentBracket != tok::less && (isKeywordWithCondition(*Line.First) || CurrentToken->getStartOfNonWhitespace() == CurrentToken->Next->getStartOfNonWhitespace().getLocWithOffset( -1))) { return false; } Left->MatchingParen = CurrentToken; CurrentToken->MatchingParen = Left; // In TT_Proto, we must distignuish between: // map // msg < item: data > // msg: < item: data > // In TT_TextProto, map does not occur. if (Style.Language == FormatStyle::LK_TextProto || (Style.Language == FormatStyle::LK_Proto && Left->Previous && Left->Previous->isOneOf(TT_SelectorName, TT_DictLiteral))) { CurrentToken->setType(TT_DictLiteral); } else { CurrentToken->setType(TT_TemplateCloser); } next(); return true; } if (CurrentToken->is(tok::question) && Style.Language == FormatStyle::LK_Java) { next(); continue; } if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace) || (CurrentToken->isOneOf(tok::colon, tok::question) && InExprContext && !Style.isCSharp() && Style.Language != FormatStyle::LK_Proto && Style.Language != FormatStyle::LK_TextProto)) { return false; } // If a && or || is found and interpreted as a binary operator, this set // of angles is likely part of something like "a < b && c > d". If the // angles are inside an expression, the ||/&& might also be a binary // operator that was misinterpreted because we are parsing template // parameters. // FIXME: This is getting out of hand, write a decent parser. if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) && CurrentToken->Previous->is(TT_BinaryOperator) && Contexts[Contexts.size() - 2].IsExpression && !Line.startsWith(tok::kw_template)) { return false; } updateParameterCount(Left, CurrentToken); if (Style.Language == FormatStyle::LK_Proto) { if (FormatToken *Previous = CurrentToken->getPreviousNonComment()) { if (CurrentToken->is(tok::colon) || (CurrentToken->isOneOf(tok::l_brace, tok::less) && Previous->isNot(tok::colon))) { Previous->setType(TT_SelectorName); } } } if (!consumeToken()) return false; } return false; } bool parseUntouchableParens() { while (CurrentToken) { CurrentToken->Finalized = true; switch (CurrentToken->Tok.getKind()) { case tok::l_paren: next(); if (!parseUntouchableParens()) return false; continue; case tok::r_paren: next(); return true; default: // no-op break; } next(); } return false; } bool parseParens(bool LookForDecls = false) { if (!CurrentToken) return false; assert(CurrentToken->Previous && "Unknown previous token"); FormatToken &OpeningParen = *CurrentToken->Previous; assert(OpeningParen.is(tok::l_paren)); FormatToken *PrevNonComment = OpeningParen.getPreviousNonComment(); OpeningParen.ParentBracket = Contexts.back().ContextKind; ScopedContextCreator ContextCreator(*this, tok::l_paren, 1); // FIXME: This is a bit of a hack. Do better. Contexts.back().ColonIsForRangeExpr = Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr; if (OpeningParen.Previous && OpeningParen.Previous->is(TT_UntouchableMacroFunc)) { OpeningParen.Finalized = true; return parseUntouchableParens(); } bool StartsObjCMethodExpr = false; if (FormatToken *MaybeSel = OpeningParen.Previous) { // @selector( starts a selector. if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Previous && MaybeSel->Previous->is(tok::at)) { StartsObjCMethodExpr = true; } } if (OpeningParen.is(TT_OverloadedOperatorLParen)) { // Find the previous kw_operator token. FormatToken *Prev = &OpeningParen; while (!Prev->is(tok::kw_operator)) { Prev = Prev->Previous; assert(Prev && "Expect a kw_operator prior to the OperatorLParen!"); } // If faced with "a.operator*(argument)" or "a->operator*(argument)", // i.e. the operator is called as a member function, // then the argument must be an expression. bool OperatorCalledAsMemberFunction = Prev->Previous && Prev->Previous->isOneOf(tok::period, tok::arrow); Contexts.back().IsExpression = OperatorCalledAsMemberFunction; } else if (Style.isJavaScript() && (Line.startsWith(Keywords.kw_type, tok::identifier) || Line.startsWith(tok::kw_export, Keywords.kw_type, tok::identifier))) { // type X = (...); // export type X = (...); Contexts.back().IsExpression = false; } else if (OpeningParen.Previous && (OpeningParen.Previous->isOneOf(tok::kw_static_assert, tok::kw_while, tok::l_paren, tok::comma, TT_BinaryOperator) || OpeningParen.Previous->isIf())) { // static_assert, if and while usually contain expressions. Contexts.back().IsExpression = true; } else if (Style.isJavaScript() && OpeningParen.Previous && (OpeningParen.Previous->is(Keywords.kw_function) || (OpeningParen.Previous->endsSequence(tok::identifier, Keywords.kw_function)))) { // function(...) or function f(...) Contexts.back().IsExpression = false; } else if (Style.isJavaScript() && OpeningParen.Previous && OpeningParen.Previous->is(TT_JsTypeColon)) { // let x: (SomeType); Contexts.back().IsExpression = false; } else if (isLambdaParameterList(&OpeningParen)) { // This is a parameter list of a lambda expression. Contexts.back().IsExpression = false; } else if (Line.InPPDirective && (!OpeningParen.Previous || !OpeningParen.Previous->is(tok::identifier))) { Contexts.back().IsExpression = true; } else if (Contexts[Contexts.size() - 2].CaretFound) { // This is the parameter list of an ObjC block. Contexts.back().IsExpression = false; } else if (OpeningParen.Previous && OpeningParen.Previous->is(TT_ForEachMacro)) { // The first argument to a foreach macro is a declaration. Contexts.back().ContextType = Context::ForEachMacro; Contexts.back().IsExpression = false; } else if (OpeningParen.Previous && OpeningParen.Previous->MatchingParen && OpeningParen.Previous->MatchingParen->is(TT_ObjCBlockLParen)) { Contexts.back().IsExpression = false; } else if (!Line.MustBeDeclaration && !Line.InPPDirective) { bool IsForOrCatch = OpeningParen.Previous && OpeningParen.Previous->isOneOf(tok::kw_for, tok::kw_catch); Contexts.back().IsExpression = !IsForOrCatch; } // Infer the role of the l_paren based on the previous token if we haven't // detected one one yet. if (PrevNonComment && OpeningParen.is(TT_Unknown)) { if (PrevNonComment->is(tok::kw___attribute)) { OpeningParen.setType(TT_AttributeParen); } else if (PrevNonComment->isOneOf(TT_TypenameMacro, tok::kw_decltype, tok::kw_typeof, tok::kw__Atomic, tok::kw___underlying_type)) { OpeningParen.setType(TT_TypeDeclarationParen); // decltype() and typeof() usually contain expressions. if (PrevNonComment->isOneOf(tok::kw_decltype, tok::kw_typeof)) Contexts.back().IsExpression = true; } } if (StartsObjCMethodExpr) { Contexts.back().ColonIsObjCMethodExpr = true; OpeningParen.setType(TT_ObjCMethodExpr); } // MightBeFunctionType and ProbablyFunctionType are used for // function pointer and reference types as well as Objective-C // block types: // // void (*FunctionPointer)(void); // void (&FunctionReference)(void); // void (&&FunctionReference)(void); // void (^ObjCBlock)(void); bool MightBeFunctionType = !Contexts[Contexts.size() - 2].IsExpression; bool ProbablyFunctionType = CurrentToken->isOneOf(tok::star, tok::amp, tok::ampamp, tok::caret); bool HasMultipleLines = false; bool HasMultipleParametersOnALine = false; bool MightBeObjCForRangeLoop = OpeningParen.Previous && OpeningParen.Previous->is(tok::kw_for); FormatToken *PossibleObjCForInToken = nullptr; while (CurrentToken) { // LookForDecls is set when "if (" has been seen. Check for // 'identifier' '*' 'identifier' followed by not '=' -- this // '*' has to be a binary operator but determineStarAmpUsage() will // categorize it as an unary operator, so set the right type here. if (LookForDecls && CurrentToken->Next) { FormatToken *Prev = CurrentToken->getPreviousNonComment(); if (Prev) { FormatToken *PrevPrev = Prev->getPreviousNonComment(); FormatToken *Next = CurrentToken->Next; if (PrevPrev && PrevPrev->is(tok::identifier) && Prev->isOneOf(tok::star, tok::amp, tok::ampamp) && CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) { Prev->setType(TT_BinaryOperator); LookForDecls = false; } } } if (CurrentToken->Previous->is(TT_PointerOrReference) && CurrentToken->Previous->Previous->isOneOf(tok::l_paren, tok::coloncolon)) { ProbablyFunctionType = true; } if (CurrentToken->is(tok::comma)) MightBeFunctionType = false; if (CurrentToken->Previous->is(TT_BinaryOperator)) Contexts.back().IsExpression = true; if (CurrentToken->is(tok::r_paren)) { if (OpeningParen.isNot(TT_CppCastLParen) && MightBeFunctionType && ProbablyFunctionType && CurrentToken->Next && (CurrentToken->Next->is(tok::l_paren) || (CurrentToken->Next->is(tok::l_square) && Line.MustBeDeclaration))) { OpeningParen.setType(OpeningParen.Next->is(tok::caret) ? TT_ObjCBlockLParen : TT_FunctionTypeLParen); } OpeningParen.MatchingParen = CurrentToken; CurrentToken->MatchingParen = &OpeningParen; if (CurrentToken->Next && CurrentToken->Next->is(tok::l_brace) && OpeningParen.Previous && OpeningParen.Previous->is(tok::l_paren)) { // Detect the case where macros are used to generate lambdas or // function bodies, e.g.: // auto my_lambda = MACRO((Type *type, int i) { .. body .. }); for (FormatToken *Tok = &OpeningParen; Tok != CurrentToken; Tok = Tok->Next) { if (Tok->is(TT_BinaryOperator) && Tok->isOneOf(tok::star, tok::amp, tok::ampamp)) { Tok->setType(TT_PointerOrReference); } } } if (StartsObjCMethodExpr) { CurrentToken->setType(TT_ObjCMethodExpr); if (Contexts.back().FirstObjCSelectorName) { Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = Contexts.back().LongestObjCSelectorName; } } if (OpeningParen.is(TT_AttributeParen)) CurrentToken->setType(TT_AttributeParen); if (OpeningParen.is(TT_TypeDeclarationParen)) CurrentToken->setType(TT_TypeDeclarationParen); if (OpeningParen.Previous && OpeningParen.Previous->is(TT_JavaAnnotation)) { CurrentToken->setType(TT_JavaAnnotation); } if (OpeningParen.Previous && OpeningParen.Previous->is(TT_LeadingJavaAnnotation)) { CurrentToken->setType(TT_LeadingJavaAnnotation); } if (OpeningParen.Previous && OpeningParen.Previous->is(TT_AttributeSquare)) { CurrentToken->setType(TT_AttributeSquare); } if (!HasMultipleLines) OpeningParen.setPackingKind(PPK_Inconclusive); else if (HasMultipleParametersOnALine) OpeningParen.setPackingKind(PPK_BinPacked); else OpeningParen.setPackingKind(PPK_OnePerLine); next(); return true; } if (CurrentToken->isOneOf(tok::r_square, tok::r_brace)) return false; if (CurrentToken->is(tok::l_brace) && OpeningParen.is(TT_ObjCBlockLParen)) OpeningParen.setType(TT_Unknown); if (CurrentToken->is(tok::comma) && CurrentToken->Next && !CurrentToken->Next->HasUnescapedNewline && !CurrentToken->Next->isTrailingComment()) { HasMultipleParametersOnALine = true; } bool ProbablyFunctionTypeLParen = (CurrentToken->is(tok::l_paren) && CurrentToken->Next && CurrentToken->Next->isOneOf(tok::star, tok::amp, tok::caret)); if ((CurrentToken->Previous->isOneOf(tok::kw_const, tok::kw_auto) || CurrentToken->Previous->isSimpleTypeSpecifier()) && !(CurrentToken->is(tok::l_brace) || (CurrentToken->is(tok::l_paren) && !ProbablyFunctionTypeLParen))) { Contexts.back().IsExpression = false; } if (CurrentToken->isOneOf(tok::semi, tok::colon)) { MightBeObjCForRangeLoop = false; if (PossibleObjCForInToken) { PossibleObjCForInToken->setType(TT_Unknown); PossibleObjCForInToken = nullptr; } } if (MightBeObjCForRangeLoop && CurrentToken->is(Keywords.kw_in)) { PossibleObjCForInToken = CurrentToken; PossibleObjCForInToken->setType(TT_ObjCForIn); } // When we discover a 'new', we set CanBeExpression to 'false' in order to // parse the type correctly. Reset that after a comma. if (CurrentToken->is(tok::comma)) Contexts.back().CanBeExpression = true; FormatToken *Tok = CurrentToken; if (!consumeToken()) return false; updateParameterCount(&OpeningParen, Tok); if (CurrentToken && CurrentToken->HasUnescapedNewline) HasMultipleLines = true; } return false; } bool isCSharpAttributeSpecifier(const FormatToken &Tok) { if (!Style.isCSharp()) return false; // `identifier[i]` is not an attribute. if (Tok.Previous && Tok.Previous->is(tok::identifier)) return false; // Chains of [] in `identifier[i][j][k]` are not attributes. if (Tok.Previous && Tok.Previous->is(tok::r_square)) { auto *MatchingParen = Tok.Previous->MatchingParen; if (!MatchingParen || MatchingParen->is(TT_ArraySubscriptLSquare)) return false; } const FormatToken *AttrTok = Tok.Next; if (!AttrTok) return false; // Just an empty declaration e.g. string []. if (AttrTok->is(tok::r_square)) return false; // Move along the tokens inbetween the '[' and ']' e.g. [STAThread]. while (AttrTok && AttrTok->isNot(tok::r_square)) AttrTok = AttrTok->Next; if (!AttrTok) return false; // Allow an attribute to be the only content of a file. AttrTok = AttrTok->Next; if (!AttrTok) return true; // Limit this to being an access modifier that follows. if (AttrTok->isOneOf(tok::kw_public, tok::kw_private, tok::kw_protected, tok::comment, tok::kw_class, tok::kw_static, tok::l_square, Keywords.kw_internal)) { return true; } // incase its a [XXX] retval func(.... if (AttrTok->Next && AttrTok->Next->startsSequence(tok::identifier, tok::l_paren)) { return true; } return false; } bool isCpp11AttributeSpecifier(const FormatToken &Tok) { if (!Style.isCpp() || !Tok.startsSequence(tok::l_square, tok::l_square)) return false; // The first square bracket is part of an ObjC array literal if (Tok.Previous && Tok.Previous->is(tok::at)) return false; const FormatToken *AttrTok = Tok.Next->Next; if (!AttrTok) return false; // C++17 '[[using ns: foo, bar(baz, blech)]]' // We assume nobody will name an ObjC variable 'using'. if (AttrTok->startsSequence(tok::kw_using, tok::identifier, tok::colon)) return true; if (AttrTok->isNot(tok::identifier)) return false; while (AttrTok && !AttrTok->startsSequence(tok::r_square, tok::r_square)) { // ObjC message send. We assume nobody will use : in a C++11 attribute // specifier parameter, although this is technically valid: // [[foo(:)]]. if (AttrTok->is(tok::colon) || AttrTok->startsSequence(tok::identifier, tok::identifier) || AttrTok->startsSequence(tok::r_paren, tok::identifier)) { return false; } if (AttrTok->is(tok::ellipsis)) return true; AttrTok = AttrTok->Next; } return AttrTok && AttrTok->startsSequence(tok::r_square, tok::r_square); } bool parseSquare() { if (!CurrentToken) return false; // A '[' could be an index subscript (after an identifier or after // ')' or ']'), it could be the start of an Objective-C method // expression, it could the start of an Objective-C array literal, // or it could be a C++ attribute specifier [[foo::bar]]. FormatToken *Left = CurrentToken->Previous; Left->ParentBracket = Contexts.back().ContextKind; FormatToken *Parent = Left->getPreviousNonComment(); // Cases where '>' is followed by '['. // In C++, this can happen either in array of templates (foo[10]) // or when array is a nested template type (unique_ptr[]>). bool CppArrayTemplates = Style.isCpp() && Parent && Parent->is(TT_TemplateCloser) && (Contexts.back().CanBeExpression || Contexts.back().IsExpression || Contexts.back().ContextType == Context::TemplateArgument); bool IsCpp11AttributeSpecifier = isCpp11AttributeSpecifier(*Left) || Contexts.back().InCpp11AttributeSpecifier; // Treat C# Attributes [STAThread] much like C++ attributes [[...]]. bool IsCSharpAttributeSpecifier = isCSharpAttributeSpecifier(*Left) || Contexts.back().InCSharpAttributeSpecifier; bool InsideInlineASM = Line.startsWith(tok::kw_asm); bool IsCppStructuredBinding = Left->isCppStructuredBinding(Style); bool StartsObjCMethodExpr = !IsCppStructuredBinding && !InsideInlineASM && !CppArrayTemplates && Style.isCpp() && !IsCpp11AttributeSpecifier && !IsCSharpAttributeSpecifier && Contexts.back().CanBeExpression && Left->isNot(TT_LambdaLSquare) && !CurrentToken->isOneOf(tok::l_brace, tok::r_square) && (!Parent || Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren, tok::kw_return, tok::kw_throw) || Parent->isUnaryOperator() || // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen. Parent->isOneOf(TT_ObjCForIn, TT_CastRParen) || (getBinOpPrecedence(Parent->Tok.getKind(), true, true) > prec::Unknown)); bool ColonFound = false; unsigned BindingIncrease = 1; if (IsCppStructuredBinding) { Left->setType(TT_StructuredBindingLSquare); } else if (Left->is(TT_Unknown)) { if (StartsObjCMethodExpr) { Left->setType(TT_ObjCMethodExpr); } else if (InsideInlineASM) { Left->setType(TT_InlineASMSymbolicNameLSquare); } else if (IsCpp11AttributeSpecifier) { Left->setType(TT_AttributeSquare); } else if (Style.isJavaScript() && Parent && Contexts.back().ContextKind == tok::l_brace && Parent->isOneOf(tok::l_brace, tok::comma)) { Left->setType(TT_JsComputedPropertyName); } else if (Style.isCpp() && Contexts.back().ContextKind == tok::l_brace && Parent && Parent->isOneOf(tok::l_brace, tok::comma)) { Left->setType(TT_DesignatedInitializerLSquare); } else if (IsCSharpAttributeSpecifier) { Left->setType(TT_AttributeSquare); } else if (CurrentToken->is(tok::r_square) && Parent && Parent->is(TT_TemplateCloser)) { Left->setType(TT_ArraySubscriptLSquare); } else if (Style.Language == FormatStyle::LK_Proto || Style.Language == FormatStyle::LK_TextProto) { // Square braces in LK_Proto can either be message field attributes: // // optional Aaa aaa = 1 [ // (aaa) = aaa // ]; // // extensions 123 [ // (aaa) = aaa // ]; // // or text proto extensions (in options): // // option (Aaa.options) = { // [type.type/type] { // key: value // } // } // // or repeated fields (in options): // // option (Aaa.options) = { // keys: [ 1, 2, 3 ] // } // // In the first and the third case we want to spread the contents inside // the square braces; in the second we want to keep them inline. Left->setType(TT_ArrayInitializerLSquare); if (!Left->endsSequence(tok::l_square, tok::numeric_constant, tok::equal) && !Left->endsSequence(tok::l_square, tok::numeric_constant, tok::identifier) && !Left->endsSequence(tok::l_square, tok::colon, TT_SelectorName)) { Left->setType(TT_ProtoExtensionLSquare); BindingIncrease = 10; } } else if (!CppArrayTemplates && Parent && Parent->isOneOf(TT_BinaryOperator, TT_TemplateCloser, tok::at, tok::comma, tok::l_paren, tok::l_square, tok::question, tok::colon, tok::kw_return, // Should only be relevant to JavaScript: tok::kw_default)) { Left->setType(TT_ArrayInitializerLSquare); } else { BindingIncrease = 10; Left->setType(TT_ArraySubscriptLSquare); } } ScopedContextCreator ContextCreator(*this, tok::l_square, BindingIncrease); Contexts.back().IsExpression = true; if (Style.isJavaScript() && Parent && Parent->is(TT_JsTypeColon)) Contexts.back().IsExpression = false; Contexts.back().ColonIsObjCMethodExpr = StartsObjCMethodExpr; Contexts.back().InCpp11AttributeSpecifier = IsCpp11AttributeSpecifier; Contexts.back().InCSharpAttributeSpecifier = IsCSharpAttributeSpecifier; while (CurrentToken) { if (CurrentToken->is(tok::r_square)) { if (IsCpp11AttributeSpecifier) CurrentToken->setType(TT_AttributeSquare); if (IsCSharpAttributeSpecifier) { CurrentToken->setType(TT_AttributeSquare); } else if (((CurrentToken->Next && CurrentToken->Next->is(tok::l_paren)) || (CurrentToken->Previous && CurrentToken->Previous->Previous == Left)) && Left->is(TT_ObjCMethodExpr)) { // An ObjC method call is rarely followed by an open parenthesis. It // also can't be composed of just one token, unless it's a macro that // will be expanded to more tokens. // FIXME: Do we incorrectly label ":" with this? StartsObjCMethodExpr = false; Left->setType(TT_Unknown); } if (StartsObjCMethodExpr && CurrentToken->Previous != Left) { CurrentToken->setType(TT_ObjCMethodExpr); // If we haven't seen a colon yet, make sure the last identifier // before the r_square is tagged as a selector name component. if (!ColonFound && CurrentToken->Previous && CurrentToken->Previous->is(TT_Unknown) && canBeObjCSelectorComponent(*CurrentToken->Previous)) { CurrentToken->Previous->setType(TT_SelectorName); } // determineStarAmpUsage() thinks that '*' '[' is allocating an // array of pointers, but if '[' starts a selector then '*' is a // binary operator. if (Parent && Parent->is(TT_PointerOrReference)) Parent->overwriteFixedType(TT_BinaryOperator); } // An arrow after an ObjC method expression is not a lambda arrow. if (CurrentToken->getType() == TT_ObjCMethodExpr && CurrentToken->Next && CurrentToken->Next->is(TT_LambdaArrow)) { CurrentToken->Next->overwriteFixedType(TT_Unknown); } Left->MatchingParen = CurrentToken; CurrentToken->MatchingParen = Left; // FirstObjCSelectorName is set when a colon is found. This does // not work, however, when the method has no parameters. // Here, we set FirstObjCSelectorName when the end of the method call is // reached, in case it was not set already. if (!Contexts.back().FirstObjCSelectorName) { FormatToken *Previous = CurrentToken->getPreviousNonComment(); if (Previous && Previous->is(TT_SelectorName)) { Previous->ObjCSelectorNameParts = 1; Contexts.back().FirstObjCSelectorName = Previous; } } else { Left->ParameterCount = Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts; } if (Contexts.back().FirstObjCSelectorName) { Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = Contexts.back().LongestObjCSelectorName; if (Left->BlockParameterCount > 1) Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0; } next(); return true; } if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace)) return false; if (CurrentToken->is(tok::colon)) { if (IsCpp11AttributeSpecifier && CurrentToken->endsSequence(tok::colon, tok::identifier, tok::kw_using)) { // Remember that this is a [[using ns: foo]] C++ attribute, so we // don't add a space before the colon (unlike other colons). CurrentToken->setType(TT_AttributeColon); } else if (Left->isOneOf(TT_ArraySubscriptLSquare, TT_DesignatedInitializerLSquare)) { Left->setType(TT_ObjCMethodExpr); StartsObjCMethodExpr = true; Contexts.back().ColonIsObjCMethodExpr = true; if (Parent && Parent->is(tok::r_paren)) { // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen. Parent->setType(TT_CastRParen); } } ColonFound = true; } if (CurrentToken->is(tok::comma) && Left->is(TT_ObjCMethodExpr) && !ColonFound) { Left->setType(TT_ArrayInitializerLSquare); } FormatToken *Tok = CurrentToken; if (!consumeToken()) return false; updateParameterCount(Left, Tok); } return false; } bool couldBeInStructArrayInitializer() const { if (Contexts.size() < 2) return false; // We want to back up no more then 2 context levels i.e. // . { { <- const auto End = std::next(Contexts.rbegin(), 2); auto Last = Contexts.rbegin(); unsigned Depth = 0; for (; Last != End; ++Last) if (Last->ContextKind == tok::l_brace) ++Depth; return Depth == 2 && Last->ContextKind != tok::l_brace; } bool parseBrace() { if (!CurrentToken) return true; assert(CurrentToken->Previous); FormatToken &OpeningBrace = *CurrentToken->Previous; assert(OpeningBrace.is(tok::l_brace)); OpeningBrace.ParentBracket = Contexts.back().ContextKind; if (Contexts.back().CaretFound) OpeningBrace.overwriteFixedType(TT_ObjCBlockLBrace); Contexts.back().CaretFound = false; ScopedContextCreator ContextCreator(*this, tok::l_brace, 1); Contexts.back().ColonIsDictLiteral = true; if (OpeningBrace.is(BK_BracedInit)) Contexts.back().IsExpression = true; if (Style.isJavaScript() && OpeningBrace.Previous && OpeningBrace.Previous->is(TT_JsTypeColon)) { Contexts.back().IsExpression = false; } unsigned CommaCount = 0; while (CurrentToken) { if (CurrentToken->is(tok::r_brace)) { assert(OpeningBrace.Optional == CurrentToken->Optional); OpeningBrace.MatchingParen = CurrentToken; CurrentToken->MatchingParen = &OpeningBrace; if (Style.AlignArrayOfStructures != FormatStyle::AIAS_None) { if (OpeningBrace.ParentBracket == tok::l_brace && couldBeInStructArrayInitializer() && CommaCount > 0) { Contexts.back().ContextType = Context::StructArrayInitializer; } } next(); return true; } if (CurrentToken->isOneOf(tok::r_paren, tok::r_square)) return false; updateParameterCount(&OpeningBrace, CurrentToken); if (CurrentToken->isOneOf(tok::colon, tok::l_brace, tok::less)) { FormatToken *Previous = CurrentToken->getPreviousNonComment(); if (Previous->is(TT_JsTypeOptionalQuestion)) Previous = Previous->getPreviousNonComment(); if ((CurrentToken->is(tok::colon) && (!Contexts.back().ColonIsDictLiteral || !Style.isCpp())) || Style.Language == FormatStyle::LK_Proto || Style.Language == FormatStyle::LK_TextProto) { OpeningBrace.setType(TT_DictLiteral); if (Previous->Tok.getIdentifierInfo() || Previous->is(tok::string_literal)) { Previous->setType(TT_SelectorName); } } if (CurrentToken->is(tok::colon) && OpeningBrace.is(TT_Unknown)) OpeningBrace.setType(TT_DictLiteral); else if (Style.isJavaScript()) OpeningBrace.overwriteFixedType(TT_DictLiteral); } if (CurrentToken->is(tok::comma)) { if (Style.isJavaScript()) OpeningBrace.overwriteFixedType(TT_DictLiteral); ++CommaCount; } if (!consumeToken()) return false; } return true; } void updateParameterCount(FormatToken *Left, FormatToken *Current) { // For ObjC methods, the number of parameters is calculated differently as // method declarations have a different structure (the parameters are not // inside a bracket scope). if (Current->is(tok::l_brace) && Current->is(BK_Block)) ++Left->BlockParameterCount; if (Current->is(tok::comma)) { ++Left->ParameterCount; if (!Left->Role) Left->Role.reset(new CommaSeparatedList(Style)); Left->Role->CommaFound(Current); } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) { Left->ParameterCount = 1; } } bool parseConditional() { while (CurrentToken) { if (CurrentToken->is(tok::colon)) { CurrentToken->setType(TT_ConditionalExpr); next(); return true; } if (!consumeToken()) return false; } return false; } bool parseTemplateDeclaration() { if (CurrentToken && CurrentToken->is(tok::less)) { CurrentToken->setType(TT_TemplateOpener); next(); if (!parseAngle()) return false; if (CurrentToken) CurrentToken->Previous->ClosesTemplateDeclaration = true; return true; } return false; } bool consumeToken() { FormatToken *Tok = CurrentToken; next(); switch (Tok->Tok.getKind()) { case tok::plus: case tok::minus: if (!Tok->Previous && Line.MustBeDeclaration) Tok->setType(TT_ObjCMethodSpecifier); break; case tok::colon: if (!Tok->Previous) return false; // Colons from ?: are handled in parseConditional(). if (Style.isJavaScript()) { if (Contexts.back().ColonIsForRangeExpr || // colon in for loop (Contexts.size() == 1 && // switch/case labels !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) || Contexts.back().ContextKind == tok::l_paren || // function params Contexts.back().ContextKind == tok::l_square || // array type (!Contexts.back().IsExpression && Contexts.back().ContextKind == tok::l_brace) || // object type (Contexts.size() == 1 && Line.MustBeDeclaration)) { // method/property declaration Contexts.back().IsExpression = false; Tok->setType(TT_JsTypeColon); break; } } else if (Style.isCSharp()) { if (Contexts.back().InCSharpAttributeSpecifier) { Tok->setType(TT_AttributeColon); break; } if (Contexts.back().ContextKind == tok::l_paren) { Tok->setType(TT_CSharpNamedArgumentColon); break; } } if (Line.First->isOneOf(Keywords.kw_module, Keywords.kw_import) || Line.First->startsSequence(tok::kw_export, Keywords.kw_module) || Line.First->startsSequence(tok::kw_export, Keywords.kw_import)) { Tok->setType(TT_ModulePartitionColon); } else if (Contexts.back().ColonIsDictLiteral || Style.Language == FormatStyle::LK_Proto || Style.Language == FormatStyle::LK_TextProto) { Tok->setType(TT_DictLiteral); if (Style.Language == FormatStyle::LK_TextProto) { if (FormatToken *Previous = Tok->getPreviousNonComment()) Previous->setType(TT_SelectorName); } } else if (Contexts.back().ColonIsObjCMethodExpr || Line.startsWith(TT_ObjCMethodSpecifier)) { Tok->setType(TT_ObjCMethodExpr); const FormatToken *BeforePrevious = Tok->Previous->Previous; // Ensure we tag all identifiers in method declarations as // TT_SelectorName. bool UnknownIdentifierInMethodDeclaration = Line.startsWith(TT_ObjCMethodSpecifier) && Tok->Previous->is(tok::identifier) && Tok->Previous->is(TT_Unknown); if (!BeforePrevious || // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen. !(BeforePrevious->is(TT_CastRParen) || (BeforePrevious->is(TT_ObjCMethodExpr) && BeforePrevious->is(tok::colon))) || BeforePrevious->is(tok::r_square) || Contexts.back().LongestObjCSelectorName == 0 || UnknownIdentifierInMethodDeclaration) { Tok->Previous->setType(TT_SelectorName); if (!Contexts.back().FirstObjCSelectorName) { Contexts.back().FirstObjCSelectorName = Tok->Previous; } else if (Tok->Previous->ColumnWidth > Contexts.back().LongestObjCSelectorName) { Contexts.back().LongestObjCSelectorName = Tok->Previous->ColumnWidth; } Tok->Previous->ParameterIndex = Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts; ++Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts; } } else if (Contexts.back().ColonIsForRangeExpr) { Tok->setType(TT_RangeBasedForLoopColon); } else if (CurrentToken && CurrentToken->is(tok::numeric_constant)) { Tok->setType(TT_BitFieldColon); } else if (Contexts.size() == 1 && !Line.First->isOneOf(tok::kw_enum, tok::kw_case, tok::kw_default)) { FormatToken *Prev = Tok->getPreviousNonComment(); if (!Prev) break; if (Prev->isOneOf(tok::r_paren, tok::kw_noexcept) || Prev->ClosesRequiresClause) { Tok->setType(TT_CtorInitializerColon); } else if (Prev->is(tok::kw_try)) { // Member initializer list within function try block. FormatToken *PrevPrev = Prev->getPreviousNonComment(); if (!PrevPrev) break; if (PrevPrev && PrevPrev->isOneOf(tok::r_paren, tok::kw_noexcept)) Tok->setType(TT_CtorInitializerColon); } else { Tok->setType(TT_InheritanceColon); } } else if (canBeObjCSelectorComponent(*Tok->Previous) && Tok->Next && (Tok->Next->isOneOf(tok::r_paren, tok::comma) || (canBeObjCSelectorComponent(*Tok->Next) && Tok->Next->Next && Tok->Next->Next->is(tok::colon)))) { // This handles a special macro in ObjC code where selectors including // the colon are passed as macro arguments. Tok->setType(TT_ObjCMethodExpr); } else if (Contexts.back().ContextKind == tok::l_paren) { Tok->setType(TT_InlineASMColon); } break; case tok::pipe: case tok::amp: // | and & in declarations/type expressions represent union and // intersection types, respectively. if (Style.isJavaScript() && !Contexts.back().IsExpression) Tok->setType(TT_JsTypeOperator); break; case tok::kw_if: if (CurrentToken && CurrentToken->isOneOf(tok::kw_constexpr, tok::identifier)) { next(); } LLVM_FALLTHROUGH; case tok::kw_while: if (CurrentToken && CurrentToken->is(tok::l_paren)) { next(); if (!parseParens(/*LookForDecls=*/true)) return false; } break; case tok::kw_for: if (Style.isJavaScript()) { // x.for and {for: ...} if ((Tok->Previous && Tok->Previous->is(tok::period)) || (Tok->Next && Tok->Next->is(tok::colon))) { break; } // JS' for await ( ... if (CurrentToken && CurrentToken->is(Keywords.kw_await)) next(); } if (Style.isCpp() && CurrentToken && CurrentToken->is(tok::kw_co_await)) next(); Contexts.back().ColonIsForRangeExpr = true; if (!CurrentToken || CurrentToken->isNot(tok::l_paren)) return false; next(); if (!parseParens()) return false; break; case tok::l_paren: // When faced with 'operator()()', the kw_operator handler incorrectly // marks the first l_paren as a OverloadedOperatorLParen. Here, we make // the first two parens OverloadedOperators and the second l_paren an // OverloadedOperatorLParen. if (Tok->Previous && Tok->Previous->is(tok::r_paren) && Tok->Previous->MatchingParen && Tok->Previous->MatchingParen->is(TT_OverloadedOperatorLParen)) { Tok->Previous->setType(TT_OverloadedOperator); Tok->Previous->MatchingParen->setType(TT_OverloadedOperator); Tok->setType(TT_OverloadedOperatorLParen); } if (!parseParens()) return false; if (Line.MustBeDeclaration && Contexts.size() == 1 && !Contexts.back().IsExpression && !Line.startsWith(TT_ObjCProperty) && !Tok->isOneOf(TT_TypeDeclarationParen, TT_RequiresExpressionLParen) && (!Tok->Previous || !Tok->Previous->isOneOf(tok::kw___attribute, TT_LeadingJavaAnnotation))) { Line.MightBeFunctionDecl = true; } break; case tok::l_square: if (!parseSquare()) return false; break; case tok::l_brace: if (Style.Language == FormatStyle::LK_TextProto) { FormatToken *Previous = Tok->getPreviousNonComment(); if (Previous && Previous->getType() != TT_DictLiteral) Previous->setType(TT_SelectorName); } if (!parseBrace()) return false; break; case tok::less: if (parseAngle()) { Tok->setType(TT_TemplateOpener); // In TT_Proto, we must distignuish between: // map // msg < item: data > // msg: < item: data > // In TT_TextProto, map does not occur. if (Style.Language == FormatStyle::LK_TextProto || (Style.Language == FormatStyle::LK_Proto && Tok->Previous && Tok->Previous->isOneOf(TT_SelectorName, TT_DictLiteral))) { Tok->setType(TT_DictLiteral); FormatToken *Previous = Tok->getPreviousNonComment(); if (Previous && Previous->getType() != TT_DictLiteral) Previous->setType(TT_SelectorName); } } else { Tok->setType(TT_BinaryOperator); NonTemplateLess.insert(Tok); CurrentToken = Tok; next(); } break; case tok::r_paren: case tok::r_square: return false; case tok::r_brace: // Lines can start with '}'. if (Tok->Previous) return false; break; case tok::greater: if (Style.Language != FormatStyle::LK_TextProto) Tok->setType(TT_BinaryOperator); if (Tok->Previous && Tok->Previous->is(TT_TemplateCloser)) Tok->SpacesRequiredBefore = 1; break; case tok::kw_operator: if (Style.Language == FormatStyle::LK_TextProto || Style.Language == FormatStyle::LK_Proto) { break; } while (CurrentToken && !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) { if (CurrentToken->isOneOf(tok::star, tok::amp)) CurrentToken->setType(TT_PointerOrReference); consumeToken(); if (CurrentToken && CurrentToken->is(tok::comma) && CurrentToken->Previous->isNot(tok::kw_operator)) { break; } if (CurrentToken && CurrentToken->Previous->isOneOf( TT_BinaryOperator, TT_UnaryOperator, tok::comma, tok::star, tok::arrow, tok::amp, tok::ampamp)) { CurrentToken->Previous->setType(TT_OverloadedOperator); } } if (CurrentToken && CurrentToken->is(tok::l_paren)) CurrentToken->setType(TT_OverloadedOperatorLParen); if (CurrentToken && CurrentToken->Previous->is(TT_BinaryOperator)) CurrentToken->Previous->setType(TT_OverloadedOperator); break; case tok::question: if (Style.isJavaScript() && Tok->Next && Tok->Next->isOneOf(tok::semi, tok::comma, tok::colon, tok::r_paren, tok::r_brace)) { // Question marks before semicolons, colons, etc. indicate optional // types (fields, parameters), e.g. // function(x?: string, y?) {...} // class X { y?; } Tok->setType(TT_JsTypeOptionalQuestion); break; } // Declarations cannot be conditional expressions, this can only be part // of a type declaration. if (Line.MustBeDeclaration && !Contexts.back().IsExpression && Style.isJavaScript()) { break; } if (Style.isCSharp()) { // `Type?)`, `Type?>`, `Type? name;` and `Type? name =` can only be // nullable types. // Line.MustBeDeclaration will be true for `Type? name;`. if ((!Contexts.back().IsExpression && Line.MustBeDeclaration) || (Tok->Next && Tok->Next->isOneOf(tok::r_paren, tok::greater)) || (Tok->Next && Tok->Next->is(tok::identifier) && Tok->Next->Next && Tok->Next->Next->is(tok::equal))) { Tok->setType(TT_CSharpNullable); break; } } parseConditional(); break; case tok::kw_template: parseTemplateDeclaration(); break; case tok::comma: switch (Contexts.back().ContextType) { case Context::CtorInitializer: Tok->setType(TT_CtorInitializerComma); break; case Context::InheritanceList: Tok->setType(TT_InheritanceComma); break; default: if (Contexts.back().FirstStartOfName && (Contexts.size() == 1 || startsWithInitStatement(Line))) { Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true; Line.IsMultiVariableDeclStmt = true; } break; } if (Contexts.back().ContextType == Context::ForEachMacro) Contexts.back().IsExpression = true; break; case tok::identifier: if (Tok->isOneOf(Keywords.kw___has_include, Keywords.kw___has_include_next)) { parseHasInclude(); } if (Style.isCSharp() && Tok->is(Keywords.kw_where) && Tok->Next && Tok->Next->isNot(tok::l_paren)) { Tok->setType(TT_CSharpGenericTypeConstraint); parseCSharpGenericTypeConstraint(); } break; case tok::arrow: if (Tok->isNot(TT_LambdaArrow) && Tok->Previous && Tok->Previous->is(tok::kw_noexcept)) { Tok->setType(TT_TrailingReturnArrow); } break; default: break; } return true; } void parseCSharpGenericTypeConstraint() { int OpenAngleBracketsCount = 0; while (CurrentToken) { if (CurrentToken->is(tok::less)) { // parseAngle is too greedy and will consume the whole line. CurrentToken->setType(TT_TemplateOpener); ++OpenAngleBracketsCount; next(); } else if (CurrentToken->is(tok::greater)) { CurrentToken->setType(TT_TemplateCloser); --OpenAngleBracketsCount; next(); } else if (CurrentToken->is(tok::comma) && OpenAngleBracketsCount == 0) { // We allow line breaks after GenericTypeConstraintComma's // so do not flag commas in Generics as GenericTypeConstraintComma's. CurrentToken->setType(TT_CSharpGenericTypeConstraintComma); next(); } else if (CurrentToken->is(Keywords.kw_where)) { CurrentToken->setType(TT_CSharpGenericTypeConstraint); next(); } else if (CurrentToken->is(tok::colon)) { CurrentToken->setType(TT_CSharpGenericTypeConstraintColon); next(); } else { next(); } } } void parseIncludeDirective() { if (CurrentToken && CurrentToken->is(tok::less)) { next(); while (CurrentToken) { // Mark tokens up to the trailing line comments as implicit string // literals. if (CurrentToken->isNot(tok::comment) && !CurrentToken->TokenText.startswith("//")) { CurrentToken->setType(TT_ImplicitStringLiteral); } next(); } } } void parseWarningOrError() { next(); // We still want to format the whitespace left of the first token of the // warning or error. next(); while (CurrentToken) { CurrentToken->setType(TT_ImplicitStringLiteral); next(); } } void parsePragma() { next(); // Consume "pragma". if (CurrentToken && CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_option, Keywords.kw_region)) { bool IsMark = CurrentToken->is(Keywords.kw_mark); next(); next(); // Consume first token (so we fix leading whitespace). while (CurrentToken) { if (IsMark || CurrentToken->Previous->is(TT_BinaryOperator)) CurrentToken->setType(TT_ImplicitStringLiteral); next(); } } } void parseHasInclude() { if (!CurrentToken || !CurrentToken->is(tok::l_paren)) return; next(); // '(' parseIncludeDirective(); next(); // ')' } LineType parsePreprocessorDirective() { bool IsFirstToken = CurrentToken->IsFirst; LineType Type = LT_PreprocessorDirective; next(); if (!CurrentToken) return Type; if (Style.isJavaScript() && IsFirstToken) { // JavaScript files can contain shebang lines of the form: // #!/usr/bin/env node // Treat these like C++ #include directives. while (CurrentToken) { // Tokens cannot be comments here. CurrentToken->setType(TT_ImplicitStringLiteral); next(); } return LT_ImportStatement; } if (CurrentToken->is(tok::numeric_constant)) { CurrentToken->SpacesRequiredBefore = 1; return Type; } // Hashes in the middle of a line can lead to any strange token // sequence. if (!CurrentToken->Tok.getIdentifierInfo()) return Type; // In Verilog macro expansions start with a backtick just like preprocessor // directives. Thus we stop if the word is not a preprocessor directive. if (Style.isVerilog() && !Keywords.isVerilogPPDirective(*CurrentToken)) return LT_Invalid; switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) { case tok::pp_include: case tok::pp_include_next: case tok::pp_import: next(); parseIncludeDirective(); Type = LT_ImportStatement; break; case tok::pp_error: case tok::pp_warning: parseWarningOrError(); break; case tok::pp_pragma: parsePragma(); break; case tok::pp_if: case tok::pp_elif: Contexts.back().IsExpression = true; next(); parseLine(); break; default: break; } while (CurrentToken) { FormatToken *Tok = CurrentToken; next(); if (Tok->is(tok::l_paren)) { parseParens(); } else if (Tok->isOneOf(Keywords.kw___has_include, Keywords.kw___has_include_next)) { parseHasInclude(); } } return Type; } public: LineType parseLine() { if (!CurrentToken) return LT_Invalid; NonTemplateLess.clear(); if (CurrentToken->is(tok::hash)) { // We were not yet allowed to use C++17 optional when this was being // written. So we used LT_Invalid to mark that the line is not a // preprocessor directive. auto Type = parsePreprocessorDirective(); if (Type != LT_Invalid) return Type; } // Directly allow to 'import ' to support protocol buffer // definitions (github.com/google/protobuf) or missing "#" (either way we // should not break the line). IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo(); if ((Style.Language == FormatStyle::LK_Java && CurrentToken->is(Keywords.kw_package)) || (Info && Info->getPPKeywordID() == tok::pp_import && CurrentToken->Next && CurrentToken->Next->isOneOf(tok::string_literal, tok::identifier, tok::kw_static))) { next(); parseIncludeDirective(); return LT_ImportStatement; } // If this line starts and ends in '<' and '>', respectively, it is likely // part of "#define ". if (CurrentToken->is(tok::less) && Line.Last->is(tok::greater)) { parseIncludeDirective(); return LT_ImportStatement; } // In .proto files, top-level options and package statements are very // similar to import statements and should not be line-wrapped. if (Style.Language == FormatStyle::LK_Proto && Line.Level == 0 && CurrentToken->isOneOf(Keywords.kw_option, Keywords.kw_package)) { next(); if (CurrentToken && CurrentToken->is(tok::identifier)) { while (CurrentToken) next(); return LT_ImportStatement; } } bool KeywordVirtualFound = false; bool ImportStatement = false; // import {...} from '...'; if (Style.isJavaScript() && CurrentToken->is(Keywords.kw_import)) ImportStatement = true; while (CurrentToken) { if (CurrentToken->is(tok::kw_virtual)) KeywordVirtualFound = true; if (Style.isJavaScript()) { // export {...} from '...'; // An export followed by "from 'some string';" is a re-export from // another module identified by a URI and is treated as a // LT_ImportStatement (i.e. prevent wraps on it for long URIs). // Just "export {...};" or "export class ..." should not be treated as // an import in this sense. if (Line.First->is(tok::kw_export) && CurrentToken->is(Keywords.kw_from) && CurrentToken->Next && CurrentToken->Next->isStringLiteral()) { ImportStatement = true; } if (isClosureImportStatement(*CurrentToken)) ImportStatement = true; } if (!consumeToken()) return LT_Invalid; } if (KeywordVirtualFound) return LT_VirtualFunctionDecl; if (ImportStatement) return LT_ImportStatement; if (Line.startsWith(TT_ObjCMethodSpecifier)) { if (Contexts.back().FirstObjCSelectorName) { Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = Contexts.back().LongestObjCSelectorName; } return LT_ObjCMethodDecl; } for (const auto &ctx : Contexts) if (ctx.ContextType == Context::StructArrayInitializer) return LT_ArrayOfStructInitializer; return LT_Other; } private: bool isClosureImportStatement(const FormatToken &Tok) { // FIXME: Closure-library specific stuff should not be hard-coded but be // configurable. return Tok.TokenText == "goog" && Tok.Next && Tok.Next->is(tok::period) && Tok.Next->Next && (Tok.Next->Next->TokenText == "module" || Tok.Next->Next->TokenText == "provide" || Tok.Next->Next->TokenText == "require" || Tok.Next->Next->TokenText == "requireType" || Tok.Next->Next->TokenText == "forwardDeclare") && Tok.Next->Next->Next && Tok.Next->Next->Next->is(tok::l_paren); } void resetTokenMetadata() { if (!CurrentToken) return; // Reset token type in case we have already looked at it and then // recovered from an error (e.g. failure to find the matching >). if (!CurrentToken->isTypeFinalized() && !CurrentToken->isOneOf( TT_LambdaLSquare, TT_LambdaLBrace, TT_AttributeMacro, TT_IfMacro, TT_ForEachMacro, TT_TypenameMacro, TT_FunctionLBrace, TT_ImplicitStringLiteral, TT_InlineASMBrace, TT_FatArrow, TT_LambdaArrow, TT_NamespaceMacro, TT_OverloadedOperator, TT_RegexLiteral, TT_TemplateString, TT_ObjCStringLiteral, TT_UntouchableMacroFunc, TT_StatementAttributeLikeMacro, TT_FunctionLikeOrFreestandingMacro, TT_ClassLBrace, TT_EnumLBrace, TT_RecordLBrace, TT_StructLBrace, TT_UnionLBrace, TT_RequiresClause, TT_RequiresClauseInARequiresExpression, TT_RequiresExpression, TT_RequiresExpressionLParen, TT_RequiresExpressionLBrace, TT_CompoundRequirementLBrace, TT_BracedListLBrace)) { CurrentToken->setType(TT_Unknown); } CurrentToken->Role.reset(); CurrentToken->MatchingParen = nullptr; CurrentToken->FakeLParens.clear(); CurrentToken->FakeRParens = 0; } void next() { if (!CurrentToken) return; CurrentToken->NestingLevel = Contexts.size() - 1; CurrentToken->BindingStrength = Contexts.back().BindingStrength; modifyContext(*CurrentToken); determineTokenType(*CurrentToken); CurrentToken = CurrentToken->Next; resetTokenMetadata(); } /// A struct to hold information valid in a specific context, e.g. /// a pair of parenthesis. struct Context { Context(tok::TokenKind ContextKind, unsigned BindingStrength, bool IsExpression) : ContextKind(ContextKind), BindingStrength(BindingStrength), IsExpression(IsExpression) {} tok::TokenKind ContextKind; unsigned BindingStrength; bool IsExpression; unsigned LongestObjCSelectorName = 0; bool ColonIsForRangeExpr = false; bool ColonIsDictLiteral = false; bool ColonIsObjCMethodExpr = false; FormatToken *FirstObjCSelectorName = nullptr; FormatToken *FirstStartOfName = nullptr; bool CanBeExpression = true; bool CaretFound = false; bool InCpp11AttributeSpecifier = false; bool InCSharpAttributeSpecifier = false; enum { Unknown, // Like the part after `:` in a constructor. // Context(...) : IsExpression(IsExpression) CtorInitializer, // Like in the parentheses in a foreach. ForEachMacro, // Like the inheritance list in a class declaration. // class Input : public IO InheritanceList, // Like in the braced list. // int x[] = {}; StructArrayInitializer, // Like in `static_cast`. TemplateArgument, } ContextType = Unknown; }; /// Puts a new \c Context onto the stack \c Contexts for the lifetime /// of each instance. struct ScopedContextCreator { AnnotatingParser &P; ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind, unsigned Increase) : P(P) { P.Contexts.push_back(Context(ContextKind, P.Contexts.back().BindingStrength + Increase, P.Contexts.back().IsExpression)); } ~ScopedContextCreator() { if (P.Style.AlignArrayOfStructures != FormatStyle::AIAS_None) { if (P.Contexts.back().ContextType == Context::StructArrayInitializer) { P.Contexts.pop_back(); P.Contexts.back().ContextType = Context::StructArrayInitializer; return; } } P.Contexts.pop_back(); } }; void modifyContext(const FormatToken &Current) { auto AssignmentStartsExpression = [&]() { if (Current.getPrecedence() != prec::Assignment) return false; if (Line.First->isOneOf(tok::kw_using, tok::kw_return)) return false; if (Line.First->is(tok::kw_template)) { assert(Current.Previous); if (Current.Previous->is(tok::kw_operator)) { // `template ... operator=` cannot be an expression. return false; } // `template` keyword can start a variable template. const FormatToken *Tok = Line.First->getNextNonComment(); assert(Tok); // Current token is on the same line. if (Tok->isNot(TT_TemplateOpener)) { // Explicit template instantiations do not have `<>`. return false; } Tok = Tok->MatchingParen; if (!Tok) return false; Tok = Tok->getNextNonComment(); if (!Tok) return false; if (Tok->isOneOf(tok::kw_class, tok::kw_enum, tok::kw_concept, tok::kw_struct, tok::kw_using)) { return false; } return true; } // Type aliases use `type X = ...;` in TypeScript and can be exported // using `export type ...`. if (Style.isJavaScript() && (Line.startsWith(Keywords.kw_type, tok::identifier) || Line.startsWith(tok::kw_export, Keywords.kw_type, tok::identifier))) { return false; } return !Current.Previous || Current.Previous->isNot(tok::kw_operator); }; if (AssignmentStartsExpression()) { Contexts.back().IsExpression = true; if (!Line.startsWith(TT_UnaryOperator)) { for (FormatToken *Previous = Current.Previous; Previous && Previous->Previous && !Previous->Previous->isOneOf(tok::comma, tok::semi); Previous = Previous->Previous) { if (Previous->isOneOf(tok::r_square, tok::r_paren)) { Previous = Previous->MatchingParen; if (!Previous) break; } if (Previous->opensScope()) break; if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator) && Previous->isOneOf(tok::star, tok::amp, tok::ampamp) && Previous->Previous && Previous->Previous->isNot(tok::equal)) { Previous->setType(TT_PointerOrReference); } } } } else if (Current.is(tok::lessless) && (!Current.Previous || !Current.Previous->is(tok::kw_operator))) { Contexts.back().IsExpression = true; } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) { Contexts.back().IsExpression = true; } else if (Current.is(TT_TrailingReturnArrow)) { Contexts.back().IsExpression = false; } else if (Current.is(TT_LambdaArrow) || Current.is(Keywords.kw_assert)) { Contexts.back().IsExpression = Style.Language == FormatStyle::LK_Java; } else if (Current.Previous && Current.Previous->is(TT_CtorInitializerColon)) { Contexts.back().IsExpression = true; Contexts.back().ContextType = Context::CtorInitializer; } else if (Current.Previous && Current.Previous->is(TT_InheritanceColon)) { Contexts.back().ContextType = Context::InheritanceList; } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) { for (FormatToken *Previous = Current.Previous; Previous && Previous->isOneOf(tok::star, tok::amp); Previous = Previous->Previous) { Previous->setType(TT_PointerOrReference); } if (Line.MustBeDeclaration && Contexts.front().ContextType != Context::CtorInitializer) { Contexts.back().IsExpression = false; } } else if (Current.is(tok::kw_new)) { Contexts.back().CanBeExpression = false; } else if (Current.is(tok::semi) || (Current.is(tok::exclaim) && Current.Previous && !Current.Previous->is(tok::kw_operator))) { // This should be the condition or increment in a for-loop. // But not operator !() (can't use TT_OverloadedOperator here as its not // been annotated yet). Contexts.back().IsExpression = true; } } static FormatToken *untilMatchingParen(FormatToken *Current) { // Used when `MatchingParen` is not yet established. int ParenLevel = 0; while (Current) { if (Current->is(tok::l_paren)) ++ParenLevel; if (Current->is(tok::r_paren)) --ParenLevel; if (ParenLevel < 1) break; Current = Current->Next; } return Current; } static bool isDeductionGuide(FormatToken &Current) { // Look for a deduction guide template A(...) -> A<...>; if (Current.Previous && Current.Previous->is(tok::r_paren) && Current.startsSequence(tok::arrow, tok::identifier, tok::less)) { // Find the TemplateCloser. FormatToken *TemplateCloser = Current.Next->Next; int NestingLevel = 0; while (TemplateCloser) { // Skip over an expressions in parens A<(3 < 2)>; if (TemplateCloser->is(tok::l_paren)) { // No Matching Paren yet so skip to matching paren TemplateCloser = untilMatchingParen(TemplateCloser); if (!TemplateCloser) break; } if (TemplateCloser->is(tok::less)) ++NestingLevel; if (TemplateCloser->is(tok::greater)) --NestingLevel; if (NestingLevel < 1) break; TemplateCloser = TemplateCloser->Next; } // Assuming we have found the end of the template ensure its followed // with a semi-colon. if (TemplateCloser && TemplateCloser->Next && TemplateCloser->Next->is(tok::semi) && Current.Previous->MatchingParen) { // Determine if the identifier `A` prior to the A<..>; is the same as // prior to the A(..) FormatToken *LeadingIdentifier = Current.Previous->MatchingParen->Previous; // Differentiate a deduction guide by seeing the // > of the template prior to the leading identifier. if (LeadingIdentifier) { FormatToken *PriorLeadingIdentifier = LeadingIdentifier->Previous; // Skip back past explicit decoration if (PriorLeadingIdentifier && PriorLeadingIdentifier->is(tok::kw_explicit)) { PriorLeadingIdentifier = PriorLeadingIdentifier->Previous; } return PriorLeadingIdentifier && (PriorLeadingIdentifier->is(TT_TemplateCloser) || PriorLeadingIdentifier->ClosesRequiresClause) && LeadingIdentifier->TokenText == Current.Next->TokenText; } } } return false; } void determineTokenType(FormatToken &Current) { if (!Current.is(TT_Unknown)) { // The token type is already known. return; } if ((Style.isJavaScript() || Style.isCSharp()) && Current.is(tok::exclaim)) { if (Current.Previous) { bool IsIdentifier = Style.isJavaScript() ? Keywords.IsJavaScriptIdentifier( *Current.Previous, /* AcceptIdentifierName= */ true) : Current.Previous->is(tok::identifier); if (IsIdentifier || Current.Previous->isOneOf( tok::kw_default, tok::kw_namespace, tok::r_paren, tok::r_square, tok::r_brace, tok::kw_false, tok::kw_true, Keywords.kw_type, Keywords.kw_get, Keywords.kw_init, Keywords.kw_set) || Current.Previous->Tok.isLiteral()) { Current.setType(TT_NonNullAssertion); return; } } if (Current.Next && Current.Next->isOneOf(TT_BinaryOperator, Keywords.kw_as)) { Current.setType(TT_NonNullAssertion); return; } } // Line.MightBeFunctionDecl can only be true after the parentheses of a // function declaration have been found. In this case, 'Current' is a // trailing token of this declaration and thus cannot be a name. if (Current.is(Keywords.kw_instanceof)) { Current.setType(TT_BinaryOperator); } else if (isStartOfName(Current) && (!Line.MightBeFunctionDecl || Current.NestingLevel != 0)) { Contexts.back().FirstStartOfName = &Current; Current.setType(TT_StartOfName); } else if (Current.is(tok::semi)) { // Reset FirstStartOfName after finding a semicolon so that a for loop // with multiple increment statements is not confused with a for loop // having multiple variable declarations. Contexts.back().FirstStartOfName = nullptr; } else if (Current.isOneOf(tok::kw_auto, tok::kw___auto_type)) { AutoFound = true; } else if (Current.is(tok::arrow) && Style.Language == FormatStyle::LK_Java) { Current.setType(TT_LambdaArrow); } else if (Current.is(tok::arrow) && AutoFound && Line.MustBeDeclaration && Current.NestingLevel == 0 && !Current.Previous->isOneOf(tok::kw_operator, tok::identifier)) { // not auto operator->() -> xxx; Current.setType(TT_TrailingReturnArrow); } else if (Current.is(tok::arrow) && Current.Previous && Current.Previous->is(tok::r_brace)) { // Concept implicit conversion constraint needs to be treated like // a trailing return type ... } -> . Current.setType(TT_TrailingReturnArrow); } else if (isDeductionGuide(Current)) { // Deduction guides trailing arrow " A(...) -> A;". Current.setType(TT_TrailingReturnArrow); } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) { Current.setType(determineStarAmpUsage( Current, Contexts.back().CanBeExpression && Contexts.back().IsExpression, Contexts.back().ContextType == Context::TemplateArgument)); } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) { Current.setType(determinePlusMinusCaretUsage(Current)); if (Current.is(TT_UnaryOperator) && Current.is(tok::caret)) Contexts.back().CaretFound = true; } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) { Current.setType(determineIncrementUsage(Current)); } else if (Current.isOneOf(tok::exclaim, tok::tilde)) { Current.setType(TT_UnaryOperator); } else if (Current.is(tok::question)) { if (Style.isJavaScript() && Line.MustBeDeclaration && !Contexts.back().IsExpression) { // In JavaScript, `interface X { foo?(): bar; }` is an optional method // on the interface, not a ternary expression. Current.setType(TT_JsTypeOptionalQuestion); } else { Current.setType(TT_ConditionalExpr); } } else if (Current.isBinaryOperator() && (!Current.Previous || Current.Previous->isNot(tok::l_square)) && (!Current.is(tok::greater) && Style.Language != FormatStyle::LK_TextProto)) { Current.setType(TT_BinaryOperator); } else if (Current.is(tok::comment)) { if (Current.TokenText.startswith("/*")) { if (Current.TokenText.endswith("*/")) { Current.setType(TT_BlockComment); } else { // The lexer has for some reason determined a comment here. But we // cannot really handle it, if it isn't properly terminated. Current.Tok.setKind(tok::unknown); } } else { Current.setType(TT_LineComment); } } else if (Current.is(tok::l_paren)) { if (lParenStartsCppCast(Current)) Current.setType(TT_CppCastLParen); } else if (Current.is(tok::r_paren)) { if (rParenEndsCast(Current)) Current.setType(TT_CastRParen); if (Current.MatchingParen && Current.Next && !Current.Next->isBinaryOperator() && !Current.Next->isOneOf(tok::semi, tok::colon, tok::l_brace, tok::comma, tok::period, tok::arrow, tok::coloncolon)) { if (FormatToken *AfterParen = Current.MatchingParen->Next) { // Make sure this isn't the return type of an Obj-C block declaration if (AfterParen->isNot(tok::caret)) { if (FormatToken *BeforeParen = Current.MatchingParen->Previous) { if (BeforeParen->is(tok::identifier) && !BeforeParen->is(TT_TypenameMacro) && BeforeParen->TokenText == BeforeParen->TokenText.upper() && (!BeforeParen->Previous || BeforeParen->Previous->ClosesTemplateDeclaration)) { Current.setType(TT_FunctionAnnotationRParen); } } } } } } else if (Current.is(tok::at) && Current.Next && !Style.isJavaScript() && Style.Language != FormatStyle::LK_Java) { // In Java & JavaScript, "@..." is a decorator or annotation. In ObjC, it // marks declarations and properties that need special formatting. switch (Current.Next->Tok.getObjCKeywordID()) { case tok::objc_interface: case tok::objc_implementation: case tok::objc_protocol: Current.setType(TT_ObjCDecl); break; case tok::objc_property: Current.setType(TT_ObjCProperty); break; default: break; } } else if (Current.is(tok::period)) { FormatToken *PreviousNoComment = Current.getPreviousNonComment(); if (PreviousNoComment && PreviousNoComment->isOneOf(tok::comma, tok::l_brace)) { Current.setType(TT_DesignatedInitializerPeriod); } else if (Style.Language == FormatStyle::LK_Java && Current.Previous && Current.Previous->isOneOf(TT_JavaAnnotation, TT_LeadingJavaAnnotation)) { Current.setType(Current.Previous->getType()); } } else if (canBeObjCSelectorComponent(Current) && // FIXME(bug 36976): ObjC return types shouldn't use // TT_CastRParen. Current.Previous && Current.Previous->is(TT_CastRParen) && Current.Previous->MatchingParen && Current.Previous->MatchingParen->Previous && Current.Previous->MatchingParen->Previous->is( TT_ObjCMethodSpecifier)) { // This is the first part of an Objective-C selector name. (If there's no // colon after this, this is the only place which annotates the identifier // as a selector.) Current.setType(TT_SelectorName); } else if (Current.isOneOf(tok::identifier, tok::kw_const, tok::kw_noexcept, tok::kw_requires) && Current.Previous && !Current.Previous->isOneOf(tok::equal, tok::at) && Line.MightBeFunctionDecl && Contexts.size() == 1) { // Line.MightBeFunctionDecl can only be true after the parentheses of a // function declaration have been found. Current.setType(TT_TrailingAnnotation); } else if ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && Current.Previous) { if (Current.Previous->is(tok::at) && Current.isNot(Keywords.kw_interface)) { const FormatToken &AtToken = *Current.Previous; const FormatToken *Previous = AtToken.getPreviousNonComment(); if (!Previous || Previous->is(TT_LeadingJavaAnnotation)) Current.setType(TT_LeadingJavaAnnotation); else Current.setType(TT_JavaAnnotation); } else if (Current.Previous->is(tok::period) && Current.Previous->isOneOf(TT_JavaAnnotation, TT_LeadingJavaAnnotation)) { Current.setType(Current.Previous->getType()); } } } /// Take a guess at whether \p Tok starts a name of a function or /// variable declaration. /// /// This is a heuristic based on whether \p Tok is an identifier following /// something that is likely a type. bool isStartOfName(const FormatToken &Tok) { if (Tok.isNot(tok::identifier) || !Tok.Previous) return false; if (Tok.Previous->isOneOf(TT_LeadingJavaAnnotation, Keywords.kw_instanceof, Keywords.kw_as)) { return false; } if (Style.isJavaScript() && Tok.Previous->is(Keywords.kw_in)) return false; // Skip "const" as it does not have an influence on whether this is a name. FormatToken *PreviousNotConst = Tok.getPreviousNonComment(); // For javascript const can be like "let" or "var" if (!Style.isJavaScript()) while (PreviousNotConst && PreviousNotConst->is(tok::kw_const)) PreviousNotConst = PreviousNotConst->getPreviousNonComment(); if (!PreviousNotConst) return false; if (PreviousNotConst->ClosesRequiresClause) return false; bool IsPPKeyword = PreviousNotConst->is(tok::identifier) && PreviousNotConst->Previous && PreviousNotConst->Previous->is(tok::hash); if (PreviousNotConst->is(TT_TemplateCloser)) { return PreviousNotConst && PreviousNotConst->MatchingParen && PreviousNotConst->MatchingParen->Previous && PreviousNotConst->MatchingParen->Previous->isNot(tok::period) && PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template); } if (PreviousNotConst->is(tok::r_paren) && PreviousNotConst->is(TT_TypeDeclarationParen)) { return true; } // If is a preprocess keyword like #define. if (IsPPKeyword) return false; // int a or auto a. if (PreviousNotConst->isOneOf(tok::identifier, tok::kw_auto)) return true; // *a or &a or &&a. if (PreviousNotConst->is(TT_PointerOrReference)) return true; // MyClass a; if (PreviousNotConst->isSimpleTypeSpecifier()) return true; // const a = in JavaScript. return Style.isJavaScript() && PreviousNotConst->is(tok::kw_const); } /// Determine whether '(' is starting a C++ cast. bool lParenStartsCppCast(const FormatToken &Tok) { // C-style casts are only used in C++. if (!Style.isCpp()) return false; FormatToken *LeftOfParens = Tok.getPreviousNonComment(); if (LeftOfParens && LeftOfParens->is(TT_TemplateCloser) && LeftOfParens->MatchingParen) { auto *Prev = LeftOfParens->MatchingParen->getPreviousNonComment(); if (Prev && Prev->isOneOf(tok::kw_const_cast, tok::kw_dynamic_cast, tok::kw_reinterpret_cast, tok::kw_static_cast)) { // FIXME: Maybe we should handle identifiers ending with "_cast", // e.g. any_cast? return true; } } return false; } /// Determine whether ')' is ending a cast. bool rParenEndsCast(const FormatToken &Tok) { // C-style casts are only used in C++, C# and Java. if (!Style.isCSharp() && !Style.isCpp() && Style.Language != FormatStyle::LK_Java) { return false; } // Empty parens aren't casts and there are no casts at the end of the line. if (Tok.Previous == Tok.MatchingParen || !Tok.Next || !Tok.MatchingParen) return false; FormatToken *LeftOfParens = Tok.MatchingParen->getPreviousNonComment(); if (LeftOfParens) { // If there is a closing parenthesis left of the current // parentheses, look past it as these might be chained casts. if (LeftOfParens->is(tok::r_paren) && LeftOfParens->isNot(TT_CastRParen)) { if (!LeftOfParens->MatchingParen || !LeftOfParens->MatchingParen->Previous) { return false; } LeftOfParens = LeftOfParens->MatchingParen->Previous; } if (LeftOfParens->is(tok::r_square)) { // delete[] (void *)ptr; auto MayBeArrayDelete = [](FormatToken *Tok) -> FormatToken * { if (Tok->isNot(tok::r_square)) return nullptr; Tok = Tok->getPreviousNonComment(); if (!Tok || Tok->isNot(tok::l_square)) return nullptr; Tok = Tok->getPreviousNonComment(); if (!Tok || Tok->isNot(tok::kw_delete)) return nullptr; return Tok; }; if (FormatToken *MaybeDelete = MayBeArrayDelete(LeftOfParens)) LeftOfParens = MaybeDelete; } // The Condition directly below this one will see the operator arguments // as a (void *foo) cast. // void operator delete(void *foo) ATTRIB; if (LeftOfParens->Tok.getIdentifierInfo() && LeftOfParens->Previous && LeftOfParens->Previous->is(tok::kw_operator)) { return false; } // If there is an identifier (or with a few exceptions a keyword) right // before the parentheses, this is unlikely to be a cast. if (LeftOfParens->Tok.getIdentifierInfo() && !LeftOfParens->isOneOf(Keywords.kw_in, tok::kw_return, tok::kw_case, tok::kw_delete)) { return false; } // Certain other tokens right before the parentheses are also signals that // this cannot be a cast. if (LeftOfParens->isOneOf(tok::at, tok::r_square, TT_OverloadedOperator, TT_TemplateCloser, tok::ellipsis)) { return false; } } if (Tok.Next->is(tok::question)) return false; // `foreach((A a, B b) in someList)` should not be seen as a cast. if (Tok.Next->is(Keywords.kw_in) && Style.isCSharp()) return false; // Functions which end with decorations like volatile, noexcept are unlikely // to be casts. if (Tok.Next->isOneOf(tok::kw_noexcept, tok::kw_volatile, tok::kw_const, tok::kw_requires, tok::kw_throw, tok::arrow, Keywords.kw_override, Keywords.kw_final) || isCpp11AttributeSpecifier(*Tok.Next)) { return false; } // As Java has no function types, a "(" after the ")" likely means that this // is a cast. if (Style.Language == FormatStyle::LK_Java && Tok.Next->is(tok::l_paren)) return true; // If a (non-string) literal follows, this is likely a cast. if (Tok.Next->isNot(tok::string_literal) && (Tok.Next->Tok.isLiteral() || Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof))) { return true; } // Heuristically try to determine whether the parentheses contain a type. auto IsQualifiedPointerOrReference = [](FormatToken *T) { // This is used to handle cases such as x = (foo *const)&y; assert(!T->isSimpleTypeSpecifier() && "Should have already been checked"); // Strip trailing qualifiers such as const or volatile when checking // whether the parens could be a cast to a pointer/reference type. while (T) { if (T->is(TT_AttributeParen)) { // Handle `x = (foo *__attribute__((foo)))&v;`: if (T->MatchingParen && T->MatchingParen->Previous && T->MatchingParen->Previous->is(tok::kw___attribute)) { T = T->MatchingParen->Previous->Previous; continue; } } else if (T->is(TT_AttributeSquare)) { // Handle `x = (foo *[[clang::foo]])&v;`: if (T->MatchingParen && T->MatchingParen->Previous) { T = T->MatchingParen->Previous; continue; } } else if (T->canBePointerOrReferenceQualifier()) { T = T->Previous; continue; } break; } return T && T->is(TT_PointerOrReference); }; bool ParensAreType = !Tok.Previous || Tok.Previous->isOneOf(TT_TemplateCloser, TT_TypeDeclarationParen) || Tok.Previous->isSimpleTypeSpecifier() || IsQualifiedPointerOrReference(Tok.Previous); bool ParensCouldEndDecl = Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace, tok::greater); if (ParensAreType && !ParensCouldEndDecl) return true; // At this point, we heuristically assume that there are no casts at the // start of the line. We assume that we have found most cases where there // are by the logic above, e.g. "(void)x;". if (!LeftOfParens) return false; // Certain token types inside the parentheses mean that this can't be a // cast. for (const FormatToken *Token = Tok.MatchingParen->Next; Token != &Tok; Token = Token->Next) { if (Token->is(TT_BinaryOperator)) return false; } // If the following token is an identifier or 'this', this is a cast. All // cases where this can be something else are handled above. if (Tok.Next->isOneOf(tok::identifier, tok::kw_this)) return true; // Look for a cast `( x ) (`. if (Tok.Next->is(tok::l_paren) && Tok.Previous && Tok.Previous->Previous) { if (Tok.Previous->is(tok::identifier) && Tok.Previous->Previous->is(tok::l_paren)) { return true; } } if (!Tok.Next->Next) return false; // If the next token after the parenthesis is a unary operator, assume // that this is cast, unless there are unexpected tokens inside the // parenthesis. bool NextIsUnary = Tok.Next->isUnaryOperator() || Tok.Next->isOneOf(tok::amp, tok::star); if (!NextIsUnary || Tok.Next->is(tok::plus) || !Tok.Next->Next->isOneOf(tok::identifier, tok::numeric_constant)) { return false; } // Search for unexpected tokens. for (FormatToken *Prev = Tok.Previous; Prev != Tok.MatchingParen; Prev = Prev->Previous) { if (!Prev->isOneOf(tok::kw_const, tok::identifier, tok::coloncolon)) return false; } return true; } /// Returns true if the token is used as a unary operator. bool determineUnaryOperatorByUsage(const FormatToken &Tok) { const FormatToken *PrevToken = Tok.getPreviousNonComment(); if (!PrevToken) return true; // These keywords are deliberately not included here because they may // precede only one of unary star/amp and plus/minus but not both. They are // either included in determineStarAmpUsage or determinePlusMinusCaretUsage. // // @ - It may be followed by a unary `-` in Objective-C literals. We don't // know how they can be followed by a star or amp. if (PrevToken->isOneOf( TT_ConditionalExpr, tok::l_paren, tok::comma, tok::colon, tok::semi, tok::equal, tok::question, tok::l_square, tok::l_brace, tok::kw_case, tok::kw_co_await, tok::kw_co_return, tok::kw_co_yield, tok::kw_delete, tok::kw_return, tok::kw_throw)) { return true; } // We put sizeof here instead of only in determineStarAmpUsage. In the cases // where the unary `+` operator is overloaded, it is reasonable to write // things like `sizeof +x`. Like commit 446d6ec996c6c3. if (PrevToken->is(tok::kw_sizeof)) return true; // A sequence of leading unary operators. if (PrevToken->isOneOf(TT_CastRParen, TT_UnaryOperator)) return true; // There can't be two consecutive binary operators. if (PrevToken->is(TT_BinaryOperator)) return true; return false; } /// Return the type of the given token assuming it is * or &. TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression, bool InTemplateArgument) { if (Style.isJavaScript()) return TT_BinaryOperator; // && in C# must be a binary operator. if (Style.isCSharp() && Tok.is(tok::ampamp)) return TT_BinaryOperator; const FormatToken *PrevToken = Tok.getPreviousNonComment(); if (!PrevToken) return TT_UnaryOperator; const FormatToken *NextToken = Tok.getNextNonComment(); if (InTemplateArgument && NextToken && NextToken->is(tok::kw_noexcept)) return TT_BinaryOperator; if (!NextToken || NextToken->isOneOf(tok::arrow, tok::equal, tok::kw_noexcept) || NextToken->canBePointerOrReferenceQualifier() || (NextToken->is(tok::l_brace) && !NextToken->getNextNonComment())) { return TT_PointerOrReference; } if (PrevToken->is(tok::coloncolon)) return TT_PointerOrReference; if (PrevToken->is(tok::r_paren) && PrevToken->is(TT_TypeDeclarationParen)) return TT_PointerOrReference; if (determineUnaryOperatorByUsage(Tok)) return TT_UnaryOperator; if (NextToken->is(tok::l_square) && NextToken->isNot(TT_LambdaLSquare)) return TT_PointerOrReference; if (NextToken->is(tok::kw_operator) && !IsExpression) return TT_PointerOrReference; if (NextToken->isOneOf(tok::comma, tok::semi)) return TT_PointerOrReference; // After right braces, star tokens are likely to be pointers to struct, // union, or class. // struct {} *ptr; // This by itself is not sufficient to distinguish from multiplication // following a brace-initialized expression, as in: // int i = int{42} * 2; // In the struct case, the part of the struct declaration until the `{` and // the `}` are put on separate unwrapped lines; in the brace-initialized // case, the matching `{` is on the same unwrapped line, so check for the // presence of the matching brace to distinguish between those. if (PrevToken->is(tok::r_brace) && Tok.is(tok::star) && !PrevToken->MatchingParen) return TT_PointerOrReference; - // For "} &&" - if (PrevToken->is(tok::r_brace) && Tok.is(tok::ampamp)) { - const FormatToken *MatchingLBrace = PrevToken->MatchingParen; - - // We check whether there is a TemplateCloser(">") to indicate it's a - // template or not. If it's not a template, "&&" is likely a reference - // operator. - // struct {} &&ref = {}; - if (!MatchingLBrace) - return TT_PointerOrReference; - FormatToken *BeforeLBrace = MatchingLBrace->getPreviousNonComment(); - if (!BeforeLBrace || BeforeLBrace->isNot(TT_TemplateCloser)) - return TT_PointerOrReference; - - // If it is a template, "&&" is a binary operator. - // enable_if<>{} && ... - return TT_BinaryOperator; - } - if (PrevToken->Tok.isLiteral() || PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true, tok::kw_false, tok::r_brace)) { return TT_BinaryOperator; } const FormatToken *NextNonParen = NextToken; while (NextNonParen && NextNonParen->is(tok::l_paren)) NextNonParen = NextNonParen->getNextNonComment(); if (NextNonParen && (NextNonParen->Tok.isLiteral() || NextNonParen->isOneOf(tok::kw_true, tok::kw_false) || NextNonParen->isUnaryOperator())) { return TT_BinaryOperator; } // If we know we're in a template argument, there are no named declarations. // Thus, having an identifier on the right-hand side indicates a binary // operator. if (InTemplateArgument && NextToken->Tok.isAnyIdentifier()) return TT_BinaryOperator; // "&&(" is quite unlikely to be two successive unary "&". if (Tok.is(tok::ampamp) && NextToken->is(tok::l_paren)) return TT_BinaryOperator; // This catches some cases where evaluation order is used as control flow: // aaa && aaa->f(); if (NextToken->Tok.isAnyIdentifier()) { const FormatToken *NextNextToken = NextToken->getNextNonComment(); if (NextNextToken && NextNextToken->is(tok::arrow)) return TT_BinaryOperator; } // It is very unlikely that we are going to find a pointer or reference type // definition on the RHS of an assignment. if (IsExpression && !Contexts.back().CaretFound) return TT_BinaryOperator; return TT_PointerOrReference; } TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) { if (determineUnaryOperatorByUsage(Tok)) return TT_UnaryOperator; const FormatToken *PrevToken = Tok.getPreviousNonComment(); if (!PrevToken) return TT_UnaryOperator; if (PrevToken->is(tok::at)) return TT_UnaryOperator; // Fall back to marking the token as binary operator. return TT_BinaryOperator; } /// Determine whether ++/-- are pre- or post-increments/-decrements. TokenType determineIncrementUsage(const FormatToken &Tok) { const FormatToken *PrevToken = Tok.getPreviousNonComment(); if (!PrevToken || PrevToken->is(TT_CastRParen)) return TT_UnaryOperator; if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier)) return TT_TrailingUnaryOperator; return TT_UnaryOperator; } SmallVector Contexts; const FormatStyle &Style; AnnotatedLine &Line; FormatToken *CurrentToken; bool AutoFound; const AdditionalKeywords &Keywords; // Set of "<" tokens that do not open a template parameter list. If parseAngle // determines that a specific token can't be a template opener, it will make // same decision irrespective of the decisions for tokens leading up to it. // Store this information to prevent this from causing exponential runtime. llvm::SmallPtrSet NonTemplateLess; }; static const int PrecedenceUnaryOperator = prec::PointerToMember + 1; static const int PrecedenceArrowAndPeriod = prec::PointerToMember + 2; /// Parses binary expressions by inserting fake parenthesis based on /// operator precedence. class ExpressionParser { public: ExpressionParser(const FormatStyle &Style, const AdditionalKeywords &Keywords, AnnotatedLine &Line) : Style(Style), Keywords(Keywords), Line(Line), Current(Line.First) {} /// Parse expressions with the given operator precedence. void parse(int Precedence = 0) { // Skip 'return' and ObjC selector colons as they are not part of a binary // expression. while (Current && (Current->is(tok::kw_return) || (Current->is(tok::colon) && Current->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral)))) { next(); } if (!Current || Precedence > PrecedenceArrowAndPeriod) return; // Conditional expressions need to be parsed separately for proper nesting. if (Precedence == prec::Conditional) { parseConditionalExpr(); return; } // Parse unary operators, which all have a higher precedence than binary // operators. if (Precedence == PrecedenceUnaryOperator) { parseUnaryOperator(); return; } FormatToken *Start = Current; FormatToken *LatestOperator = nullptr; unsigned OperatorIndex = 0; while (Current) { // Consume operators with higher precedence. parse(Precedence + 1); int CurrentPrecedence = getCurrentPrecedence(); if (Precedence == CurrentPrecedence && Current && Current->is(TT_SelectorName)) { if (LatestOperator) addFakeParenthesis(Start, prec::Level(Precedence)); Start = Current; } // At the end of the line or when an operator with higher precedence is // found, insert fake parenthesis and return. if (!Current || (Current->closesScope() && (Current->MatchingParen || Current->is(TT_TemplateString))) || (CurrentPrecedence != -1 && CurrentPrecedence < Precedence) || (CurrentPrecedence == prec::Conditional && Precedence == prec::Assignment && Current->is(tok::colon))) { break; } // Consume scopes: (), [], <> and {} // In addition to that we handle require clauses as scope, so that the // constraints in that are correctly indented. if (Current->opensScope() || Current->isOneOf(TT_RequiresClause, TT_RequiresClauseInARequiresExpression)) { // In fragment of a JavaScript template string can look like '}..${' and // thus close a scope and open a new one at the same time. while (Current && (!Current->closesScope() || Current->opensScope())) { next(); parse(); } next(); } else { // Operator found. if (CurrentPrecedence == Precedence) { if (LatestOperator) LatestOperator->NextOperator = Current; LatestOperator = Current; Current->OperatorIndex = OperatorIndex; ++OperatorIndex; } next(/*SkipPastLeadingComments=*/Precedence > 0); } } if (LatestOperator && (Current || Precedence > 0)) { // The requires clauses do not neccessarily end in a semicolon or a brace, // but just go over to struct/class or a function declaration, we need to // intervene so that the fake right paren is inserted correctly. auto End = (Start->Previous && Start->Previous->isOneOf(TT_RequiresClause, TT_RequiresClauseInARequiresExpression)) ? [this](){ auto Ret = Current ? Current : Line.Last; while (!Ret->ClosesRequiresClause && Ret->Previous) Ret = Ret->Previous; return Ret; }() : nullptr; if (Precedence == PrecedenceArrowAndPeriod) { // Call expressions don't have a binary operator precedence. addFakeParenthesis(Start, prec::Unknown, End); } else { addFakeParenthesis(Start, prec::Level(Precedence), End); } } } private: /// Gets the precedence (+1) of the given token for binary operators /// and other tokens that we treat like binary operators. int getCurrentPrecedence() { if (Current) { const FormatToken *NextNonComment = Current->getNextNonComment(); if (Current->is(TT_ConditionalExpr)) return prec::Conditional; if (NextNonComment && Current->is(TT_SelectorName) && (NextNonComment->isOneOf(TT_DictLiteral, TT_JsTypeColon) || ((Style.Language == FormatStyle::LK_Proto || Style.Language == FormatStyle::LK_TextProto) && NextNonComment->is(tok::less)))) { return prec::Assignment; } if (Current->is(TT_JsComputedPropertyName)) return prec::Assignment; if (Current->is(TT_LambdaArrow)) return prec::Comma; if (Current->is(TT_FatArrow)) return prec::Assignment; if (Current->isOneOf(tok::semi, TT_InlineASMColon, TT_SelectorName) || (Current->is(tok::comment) && NextNonComment && NextNonComment->is(TT_SelectorName))) { return 0; } if (Current->is(TT_RangeBasedForLoopColon)) return prec::Comma; if ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && Current->is(Keywords.kw_instanceof)) { return prec::Relational; } if (Style.isJavaScript() && Current->isOneOf(Keywords.kw_in, Keywords.kw_as)) { return prec::Relational; } if (Current->is(TT_BinaryOperator) || Current->is(tok::comma)) return Current->getPrecedence(); if (Current->isOneOf(tok::period, tok::arrow)) return PrecedenceArrowAndPeriod; if ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && Current->isOneOf(Keywords.kw_extends, Keywords.kw_implements, Keywords.kw_throws)) { return 0; } } return -1; } void addFakeParenthesis(FormatToken *Start, prec::Level Precedence, FormatToken *End = nullptr) { Start->FakeLParens.push_back(Precedence); if (Precedence > prec::Unknown) Start->StartsBinaryExpression = true; if (!End && Current) End = Current->getPreviousNonComment(); if (End) { ++End->FakeRParens; if (Precedence > prec::Unknown) End->EndsBinaryExpression = true; } } /// Parse unary operator expressions and surround them with fake /// parentheses if appropriate. void parseUnaryOperator() { llvm::SmallVector Tokens; while (Current && Current->is(TT_UnaryOperator)) { Tokens.push_back(Current); next(); } parse(PrecedenceArrowAndPeriod); for (FormatToken *Token : llvm::reverse(Tokens)) { // The actual precedence doesn't matter. addFakeParenthesis(Token, prec::Unknown); } } void parseConditionalExpr() { while (Current && Current->isTrailingComment()) next(); FormatToken *Start = Current; parse(prec::LogicalOr); if (!Current || !Current->is(tok::question)) return; next(); parse(prec::Assignment); if (!Current || Current->isNot(TT_ConditionalExpr)) return; next(); parse(prec::Assignment); addFakeParenthesis(Start, prec::Conditional); } void next(bool SkipPastLeadingComments = true) { if (Current) Current = Current->Next; while (Current && (Current->NewlinesBefore == 0 || SkipPastLeadingComments) && Current->isTrailingComment()) { Current = Current->Next; } } const FormatStyle &Style; const AdditionalKeywords &Keywords; const AnnotatedLine &Line; FormatToken *Current; }; } // end anonymous namespace void TokenAnnotator::setCommentLineLevels( SmallVectorImpl &Lines) const { const AnnotatedLine *NextNonCommentLine = nullptr; for (AnnotatedLine *Line : llvm::reverse(Lines)) { assert(Line->First); // If the comment is currently aligned with the line immediately following // it, that's probably intentional and we should keep it. if (NextNonCommentLine && Line->isComment() && NextNonCommentLine->First->NewlinesBefore <= 1 && NextNonCommentLine->First->OriginalColumn == Line->First->OriginalColumn) { // Align comments for preprocessor lines with the # in column 0 if // preprocessor lines are not indented. Otherwise, align with the next // line. Line->Level = (Style.IndentPPDirectives != FormatStyle::PPDIS_BeforeHash && (NextNonCommentLine->Type == LT_PreprocessorDirective || NextNonCommentLine->Type == LT_ImportStatement)) ? 0 : NextNonCommentLine->Level; } else { NextNonCommentLine = Line->First->isNot(tok::r_brace) ? Line : nullptr; } setCommentLineLevels(Line->Children); } } static unsigned maxNestingDepth(const AnnotatedLine &Line) { unsigned Result = 0; for (const auto *Tok = Line.First; Tok != nullptr; Tok = Tok->Next) Result = std::max(Result, Tok->NestingLevel); return Result; } void TokenAnnotator::annotate(AnnotatedLine &Line) const { for (auto &Child : Line.Children) annotate(*Child); AnnotatingParser Parser(Style, Line, Keywords); Line.Type = Parser.parseLine(); // With very deep nesting, ExpressionParser uses lots of stack and the // formatting algorithm is very slow. We're not going to do a good job here // anyway - it's probably generated code being formatted by mistake. // Just skip the whole line. if (maxNestingDepth(Line) > 50) Line.Type = LT_Invalid; if (Line.Type == LT_Invalid) return; ExpressionParser ExprParser(Style, Keywords, Line); ExprParser.parse(); if (Line.startsWith(TT_ObjCMethodSpecifier)) Line.Type = LT_ObjCMethodDecl; else if (Line.startsWith(TT_ObjCDecl)) Line.Type = LT_ObjCDecl; else if (Line.startsWith(TT_ObjCProperty)) Line.Type = LT_ObjCProperty; Line.First->SpacesRequiredBefore = 1; Line.First->CanBreakBefore = Line.First->MustBreakBefore; } // This function heuristically determines whether 'Current' starts the name of a // function declaration. static bool isFunctionDeclarationName(bool IsCpp, const FormatToken &Current, const AnnotatedLine &Line) { auto skipOperatorName = [](const FormatToken *Next) -> const FormatToken * { for (; Next; Next = Next->Next) { if (Next->is(TT_OverloadedOperatorLParen)) return Next; if (Next->is(TT_OverloadedOperator)) continue; if (Next->isOneOf(tok::kw_new, tok::kw_delete)) { // For 'new[]' and 'delete[]'. if (Next->Next && Next->Next->startsSequence(tok::l_square, tok::r_square)) { Next = Next->Next->Next; } continue; } if (Next->startsSequence(tok::l_square, tok::r_square)) { // For operator[](). Next = Next->Next; continue; } if ((Next->isSimpleTypeSpecifier() || Next->is(tok::identifier)) && Next->Next && Next->Next->isOneOf(tok::star, tok::amp, tok::ampamp)) { // For operator void*(), operator char*(), operator Foo*(). Next = Next->Next; continue; } if (Next->is(TT_TemplateOpener) && Next->MatchingParen) { Next = Next->MatchingParen; continue; } break; } return nullptr; }; // Find parentheses of parameter list. const FormatToken *Next = Current.Next; if (Current.is(tok::kw_operator)) { if (Current.Previous && Current.Previous->is(tok::coloncolon)) return false; Next = skipOperatorName(Next); } else { if (!Current.is(TT_StartOfName) || Current.NestingLevel != 0) return false; for (; Next; Next = Next->Next) { if (Next->is(TT_TemplateOpener)) { Next = Next->MatchingParen; } else if (Next->is(tok::coloncolon)) { Next = Next->Next; if (!Next) return false; if (Next->is(tok::kw_operator)) { Next = skipOperatorName(Next->Next); break; } if (!Next->is(tok::identifier)) return false; } else if (Next->is(tok::l_paren)) { break; } else { return false; } } } // Check whether parameter list can belong to a function declaration. if (!Next || !Next->is(tok::l_paren) || !Next->MatchingParen) return false; // If the lines ends with "{", this is likely a function definition. if (Line.Last->is(tok::l_brace)) return true; if (Next->Next == Next->MatchingParen) return true; // Empty parentheses. // If there is an &/&& after the r_paren, this is likely a function. if (Next->MatchingParen->Next && Next->MatchingParen->Next->is(TT_PointerOrReference)) { return true; } // Check for K&R C function definitions (and C++ function definitions with // unnamed parameters), e.g.: // int f(i) // { // return i + 1; // } // bool g(size_t = 0, bool b = false) // { // return !b; // } if (IsCpp && Next->Next && Next->Next->is(tok::identifier) && !Line.endsWith(tok::semi)) { return true; } for (const FormatToken *Tok = Next->Next; Tok && Tok != Next->MatchingParen; Tok = Tok->Next) { if (Tok->is(TT_TypeDeclarationParen)) return true; if (Tok->isOneOf(tok::l_paren, TT_TemplateOpener) && Tok->MatchingParen) { Tok = Tok->MatchingParen; continue; } if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() || Tok->isOneOf(TT_PointerOrReference, TT_StartOfName, tok::ellipsis)) { return true; } if (Tok->isOneOf(tok::l_brace, tok::string_literal, TT_ObjCMethodExpr) || Tok->Tok.isLiteral()) { return false; } } return false; } bool TokenAnnotator::mustBreakForReturnType(const AnnotatedLine &Line) const { assert(Line.MightBeFunctionDecl); if ((Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_TopLevel || Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_TopLevelDefinitions) && Line.Level > 0) { return false; } switch (Style.AlwaysBreakAfterReturnType) { case FormatStyle::RTBS_None: return false; case FormatStyle::RTBS_All: case FormatStyle::RTBS_TopLevel: return true; case FormatStyle::RTBS_AllDefinitions: case FormatStyle::RTBS_TopLevelDefinitions: return Line.mightBeFunctionDefinition(); } return false; } void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) const { for (AnnotatedLine *ChildLine : Line.Children) calculateFormattingInformation(*ChildLine); Line.First->TotalLength = Line.First->IsMultiline ? Style.ColumnLimit : Line.FirstStartColumn + Line.First->ColumnWidth; FormatToken *Current = Line.First->Next; bool InFunctionDecl = Line.MightBeFunctionDecl; bool AlignArrayOfStructures = (Style.AlignArrayOfStructures != FormatStyle::AIAS_None && Line.Type == LT_ArrayOfStructInitializer); if (AlignArrayOfStructures) calculateArrayInitializerColumnList(Line); while (Current) { if (isFunctionDeclarationName(Style.isCpp(), *Current, Line)) Current->setType(TT_FunctionDeclarationName); const FormatToken *Prev = Current->Previous; if (Current->is(TT_LineComment)) { if (Prev->is(BK_BracedInit) && Prev->opensScope()) { Current->SpacesRequiredBefore = (Style.Cpp11BracedListStyle && !Style.SpacesInParentheses) ? 0 : 1; } else { Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments; } // If we find a trailing comment, iterate backwards to determine whether // it seems to relate to a specific parameter. If so, break before that // parameter to avoid changing the comment's meaning. E.g. don't move 'b' // to the previous line in: // SomeFunction(a, // b, // comment // c); if (!Current->HasUnescapedNewline) { for (FormatToken *Parameter = Current->Previous; Parameter; Parameter = Parameter->Previous) { if (Parameter->isOneOf(tok::comment, tok::r_brace)) break; if (Parameter->Previous && Parameter->Previous->is(tok::comma)) { if (!Parameter->Previous->is(TT_CtorInitializerComma) && Parameter->HasUnescapedNewline) { Parameter->MustBreakBefore = true; } break; } } } } else if (Current->SpacesRequiredBefore == 0 && spaceRequiredBefore(Line, *Current)) { Current->SpacesRequiredBefore = 1; } const auto &Children = Prev->Children; if (!Children.empty() && Children.back()->Last->is(TT_LineComment)) { Current->MustBreakBefore = true; } else { Current->MustBreakBefore = Current->MustBreakBefore || mustBreakBefore(Line, *Current); if (!Current->MustBreakBefore && InFunctionDecl && Current->is(TT_FunctionDeclarationName)) { Current->MustBreakBefore = mustBreakForReturnType(Line); } } Current->CanBreakBefore = Current->MustBreakBefore || canBreakBefore(Line, *Current); unsigned ChildSize = 0; if (Prev->Children.size() == 1) { FormatToken &LastOfChild = *Prev->Children[0]->Last; ChildSize = LastOfChild.isTrailingComment() ? Style.ColumnLimit : LastOfChild.TotalLength + 1; } if (Current->MustBreakBefore || Prev->Children.size() > 1 || (Prev->Children.size() == 1 && Prev->Children[0]->First->MustBreakBefore) || Current->IsMultiline) { Current->TotalLength = Prev->TotalLength + Style.ColumnLimit; } else { Current->TotalLength = Prev->TotalLength + Current->ColumnWidth + ChildSize + Current->SpacesRequiredBefore; } if (Current->is(TT_CtorInitializerColon)) InFunctionDecl = false; // FIXME: Only calculate this if CanBreakBefore is true once static // initializers etc. are sorted out. // FIXME: Move magic numbers to a better place. // Reduce penalty for aligning ObjC method arguments using the colon // alignment as this is the canonical way (still prefer fitting everything // into one line if possible). Trying to fit a whole expression into one // line should not force other line breaks (e.g. when ObjC method // expression is a part of other expression). Current->SplitPenalty = splitPenalty(Line, *Current, InFunctionDecl); if (Style.Language == FormatStyle::LK_ObjC && Current->is(TT_SelectorName) && Current->ParameterIndex > 0) { if (Current->ParameterIndex == 1) Current->SplitPenalty += 5 * Current->BindingStrength; } else { Current->SplitPenalty += 20 * Current->BindingStrength; } Current = Current->Next; } calculateUnbreakableTailLengths(Line); unsigned IndentLevel = Line.Level; for (Current = Line.First; Current != nullptr; Current = Current->Next) { if (Current->Role) Current->Role->precomputeFormattingInfos(Current); if (Current->MatchingParen && Current->MatchingParen->opensBlockOrBlockTypeList(Style) && IndentLevel > 0) { --IndentLevel; } Current->IndentLevel = IndentLevel; if (Current->opensBlockOrBlockTypeList(Style)) ++IndentLevel; } LLVM_DEBUG({ printDebugInfo(Line); }); } void TokenAnnotator::calculateUnbreakableTailLengths( AnnotatedLine &Line) const { unsigned UnbreakableTailLength = 0; FormatToken *Current = Line.Last; while (Current) { Current->UnbreakableTailLength = UnbreakableTailLength; if (Current->CanBreakBefore || Current->isOneOf(tok::comment, tok::string_literal)) { UnbreakableTailLength = 0; } else { UnbreakableTailLength += Current->ColumnWidth + Current->SpacesRequiredBefore; } Current = Current->Previous; } } void TokenAnnotator::calculateArrayInitializerColumnList( AnnotatedLine &Line) const { if (Line.First == Line.Last) return; auto *CurrentToken = Line.First; CurrentToken->ArrayInitializerLineStart = true; unsigned Depth = 0; while (CurrentToken != nullptr && CurrentToken != Line.Last) { if (CurrentToken->is(tok::l_brace)) { CurrentToken->IsArrayInitializer = true; if (CurrentToken->Next != nullptr) CurrentToken->Next->MustBreakBefore = true; CurrentToken = calculateInitializerColumnList(Line, CurrentToken->Next, Depth + 1); } else { CurrentToken = CurrentToken->Next; } } } FormatToken *TokenAnnotator::calculateInitializerColumnList( AnnotatedLine &Line, FormatToken *CurrentToken, unsigned Depth) const { while (CurrentToken != nullptr && CurrentToken != Line.Last) { if (CurrentToken->is(tok::l_brace)) ++Depth; else if (CurrentToken->is(tok::r_brace)) --Depth; if (Depth == 2 && CurrentToken->isOneOf(tok::l_brace, tok::comma)) { CurrentToken = CurrentToken->Next; if (CurrentToken == nullptr) break; CurrentToken->StartsColumn = true; CurrentToken = CurrentToken->Previous; } CurrentToken = CurrentToken->Next; } return CurrentToken; } unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok, bool InFunctionDecl) const { const FormatToken &Left = *Tok.Previous; const FormatToken &Right = Tok; if (Left.is(tok::semi)) return 0; if (Style.Language == FormatStyle::LK_Java) { if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_throws)) return 1; if (Right.is(Keywords.kw_implements)) return 2; if (Left.is(tok::comma) && Left.NestingLevel == 0) return 3; } else if (Style.isJavaScript()) { if (Right.is(Keywords.kw_function) && Left.isNot(tok::comma)) return 100; if (Left.is(TT_JsTypeColon)) return 35; if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) || (Right.is(TT_TemplateString) && Right.TokenText.startswith("}"))) { return 100; } // Prefer breaking call chains (".foo") over empty "{}", "[]" or "()". if (Left.opensScope() && Right.closesScope()) return 200; } if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral)) return 1; if (Right.is(tok::l_square)) { if (Style.Language == FormatStyle::LK_Proto) return 1; if (Left.is(tok::r_square)) return 200; // Slightly prefer formatting local lambda definitions like functions. if (Right.is(TT_LambdaLSquare) && Left.is(tok::equal)) return 35; if (!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare, TT_ArrayInitializerLSquare, TT_DesignatedInitializerLSquare, TT_AttributeSquare)) { return 500; } } if (Left.is(tok::coloncolon) || (Right.is(tok::period) && Style.Language == FormatStyle::LK_Proto)) { return 500; } if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) || Right.is(tok::kw_operator)) { if (Line.startsWith(tok::kw_for) && Right.PartOfMultiVariableDeclStmt) return 3; if (Left.is(TT_StartOfName)) return 110; if (InFunctionDecl && Right.NestingLevel == 0) return Style.PenaltyReturnTypeOnItsOwnLine; return 200; } if (Right.is(TT_PointerOrReference)) return 190; if (Right.is(TT_LambdaArrow)) return 110; if (Left.is(tok::equal) && Right.is(tok::l_brace)) return 160; if (Left.is(TT_CastRParen)) return 100; if (Left.isOneOf(tok::kw_class, tok::kw_struct)) return 5000; if (Left.is(tok::comment)) return 1000; if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon, TT_CtorInitializerColon)) { return 2; } if (Right.isMemberAccess()) { // Breaking before the "./->" of a chained call/member access is reasonably // cheap, as formatting those with one call per line is generally // desirable. In particular, it should be cheaper to break before the call // than it is to break inside a call's parameters, which could lead to weird // "hanging" indents. The exception is the very last "./->" to support this // frequent pattern: // // aaaaaaaa.aaaaaaaa.bbbbbbb().ccccccccccccccccccccc( // dddddddd); // // which might otherwise be blown up onto many lines. Here, clang-format // won't produce "hanging" indents anyway as there is no other trailing // call. // // Also apply higher penalty is not a call as that might lead to a wrapping // like: // // aaaaaaa // .aaaaaaaaa.bbbbbbbb(cccccccc); return !Right.NextOperator || !Right.NextOperator->Previous->closesScope() ? 150 : 35; } if (Right.is(TT_TrailingAnnotation) && (!Right.Next || Right.Next->isNot(tok::l_paren))) { // Moving trailing annotations to the next line is fine for ObjC method // declarations. if (Line.startsWith(TT_ObjCMethodSpecifier)) return 10; // Generally, breaking before a trailing annotation is bad unless it is // function-like. It seems to be especially preferable to keep standard // annotations (i.e. "const", "final" and "override") on the same line. // Use a slightly higher penalty after ")" so that annotations like // "const override" are kept together. bool is_short_annotation = Right.TokenText.size() < 10; return (Left.is(tok::r_paren) ? 100 : 120) + (is_short_annotation ? 50 : 0); } // In for-loops, prefer breaking at ',' and ';'. if (Line.startsWith(tok::kw_for) && Left.is(tok::equal)) return 4; // In Objective-C method expressions, prefer breaking before "param:" over // breaking after it. if (Right.is(TT_SelectorName)) return 0; if (Left.is(tok::colon) && Left.is(TT_ObjCMethodExpr)) return Line.MightBeFunctionDecl ? 50 : 500; // In Objective-C type declarations, avoid breaking after the category's // open paren (we'll prefer breaking after the protocol list's opening // angle bracket, if present). if (Line.Type == LT_ObjCDecl && Left.is(tok::l_paren) && Left.Previous && Left.Previous->isOneOf(tok::identifier, tok::greater)) { return 500; } if (Left.is(tok::l_paren) && Style.PenaltyBreakOpenParenthesis != 0) return Style.PenaltyBreakOpenParenthesis; if (Left.is(tok::l_paren) && InFunctionDecl && Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign) { return 100; } if (Left.is(tok::l_paren) && Left.Previous && (Left.Previous->is(tok::kw_for) || Left.Previous->isIf())) { return 1000; } if (Left.is(tok::equal) && InFunctionDecl) return 110; if (Right.is(tok::r_brace)) return 1; if (Left.is(TT_TemplateOpener)) return 100; if (Left.opensScope()) { // If we aren't aligning after opening parens/braces we can always break // here unless the style does not want us to place all arguments on the // next line. if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign && (Left.ParameterCount <= 1 || Style.AllowAllArgumentsOnNextLine)) { return 0; } if (Left.is(tok::l_brace) && !Style.Cpp11BracedListStyle) return 19; return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter : 19; } if (Left.is(TT_JavaAnnotation)) return 50; if (Left.is(TT_UnaryOperator)) return 60; if (Left.isOneOf(tok::plus, tok::comma) && Left.Previous && Left.Previous->isLabelString() && (Left.NextOperator || Left.OperatorIndex != 0)) { return 50; } if (Right.is(tok::plus) && Left.isLabelString() && (Right.NextOperator || Right.OperatorIndex != 0)) { return 25; } if (Left.is(tok::comma)) return 1; if (Right.is(tok::lessless) && Left.isLabelString() && (Right.NextOperator || Right.OperatorIndex != 1)) { return 25; } if (Right.is(tok::lessless)) { // Breaking at a << is really cheap. if (!Left.is(tok::r_paren) || Right.OperatorIndex > 0) { // Slightly prefer to break before the first one in log-like statements. return 2; } return 1; } if (Left.ClosesTemplateDeclaration) return Style.PenaltyBreakTemplateDeclaration; if (Left.ClosesRequiresClause) return 0; if (Left.is(TT_ConditionalExpr)) return prec::Conditional; prec::Level Level = Left.getPrecedence(); if (Level == prec::Unknown) Level = Right.getPrecedence(); if (Level == prec::Assignment) return Style.PenaltyBreakAssignment; if (Level != prec::Unknown) return Level; return 3; } bool TokenAnnotator::spaceRequiredBeforeParens(const FormatToken &Right) const { if (Style.SpaceBeforeParens == FormatStyle::SBPO_Always) return true; if (Right.is(TT_OverloadedOperatorLParen) && Style.SpaceBeforeParensOptions.AfterOverloadedOperator) { return true; } if (Style.SpaceBeforeParensOptions.BeforeNonEmptyParentheses && Right.ParameterCount > 0) { return true; } return false; } bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left, const FormatToken &Right) const { if (Left.is(tok::kw_return) && !Right.isOneOf(tok::semi, tok::r_paren, tok::hashhash)) { return true; } if (Style.isJson() && Left.is(tok::string_literal) && Right.is(tok::colon)) return false; if (Left.is(Keywords.kw_assert) && Style.Language == FormatStyle::LK_Java) return true; if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty && Left.Tok.getObjCKeywordID() == tok::objc_property) { return true; } if (Right.is(tok::hashhash)) return Left.is(tok::hash); if (Left.isOneOf(tok::hashhash, tok::hash)) return Right.is(tok::hash); if ((Left.is(tok::l_paren) && Right.is(tok::r_paren)) || (Left.is(tok::l_brace) && Left.isNot(BK_Block) && Right.is(tok::r_brace) && Right.isNot(BK_Block))) { return Style.SpaceInEmptyParentheses; } if (Style.SpacesInConditionalStatement) { const FormatToken *LeftParen = nullptr; if (Left.is(tok::l_paren)) LeftParen = &Left; else if (Right.is(tok::r_paren) && Right.MatchingParen) LeftParen = Right.MatchingParen; if (LeftParen && LeftParen->Previous && isKeywordWithCondition(*LeftParen->Previous)) { return true; } } // auto{x} auto(x) if (Left.is(tok::kw_auto) && Right.isOneOf(tok::l_paren, tok::l_brace)) return false; // operator co_await(x) if (Right.is(tok::l_paren) && Left.is(tok::kw_co_await) && Left.Previous && Left.Previous->is(tok::kw_operator)) { return false; } // co_await (x), co_yield (x), co_return (x) if (Left.isOneOf(tok::kw_co_await, tok::kw_co_yield, tok::kw_co_return) && !Right.isOneOf(tok::semi, tok::r_paren)) { return true; } if (Left.is(tok::l_paren) || Right.is(tok::r_paren)) { return (Right.is(TT_CastRParen) || (Left.MatchingParen && Left.MatchingParen->is(TT_CastRParen))) ? Style.SpacesInCStyleCastParentheses : Style.SpacesInParentheses; } if (Right.isOneOf(tok::semi, tok::comma)) return false; if (Right.is(tok::less) && Line.Type == LT_ObjCDecl) { bool IsLightweightGeneric = Right.MatchingParen && Right.MatchingParen->Next && Right.MatchingParen->Next->is(tok::colon); return !IsLightweightGeneric && Style.ObjCSpaceBeforeProtocolList; } if (Right.is(tok::less) && Left.is(tok::kw_template)) return Style.SpaceAfterTemplateKeyword; if (Left.isOneOf(tok::exclaim, tok::tilde)) return false; if (Left.is(tok::at) && Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant, tok::numeric_constant, tok::l_paren, tok::l_brace, tok::kw_true, tok::kw_false)) { return false; } if (Left.is(tok::colon)) return !Left.is(TT_ObjCMethodExpr); if (Left.is(tok::coloncolon)) return false; if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less)) { if (Style.Language == FormatStyle::LK_TextProto || (Style.Language == FormatStyle::LK_Proto && (Left.is(TT_DictLiteral) || Right.is(TT_DictLiteral)))) { // Format empty list as `<>`. if (Left.is(tok::less) && Right.is(tok::greater)) return false; return !Style.Cpp11BracedListStyle; } return false; } if (Right.is(tok::ellipsis)) { return Left.Tok.isLiteral() || (Left.is(tok::identifier) && Left.Previous && Left.Previous->is(tok::kw_case)); } if (Left.is(tok::l_square) && Right.is(tok::amp)) return Style.SpacesInSquareBrackets; if (Right.is(TT_PointerOrReference)) { if (Left.is(tok::r_paren) && Line.MightBeFunctionDecl) { if (!Left.MatchingParen) return true; FormatToken *TokenBeforeMatchingParen = Left.MatchingParen->getPreviousNonComment(); if (!TokenBeforeMatchingParen || !Left.is(TT_TypeDeclarationParen)) return true; } // Add a space if the previous token is a pointer qualifier or the closing // parenthesis of __attribute__(()) expression and the style requires spaces // after pointer qualifiers. if ((Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_After || Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Both) && (Left.is(TT_AttributeParen) || Left.canBePointerOrReferenceQualifier())) { return true; } if (Left.Tok.isLiteral()) return true; // for (auto a = 0, b = 0; const auto & c : {1, 2, 3}) if (Left.isTypeOrIdentifier() && Right.Next && Right.Next->Next && Right.Next->Next->is(TT_RangeBasedForLoopColon)) { return getTokenPointerOrReferenceAlignment(Right) != FormatStyle::PAS_Left; } return !Left.isOneOf(TT_PointerOrReference, tok::l_paren) && (getTokenPointerOrReferenceAlignment(Right) != FormatStyle::PAS_Left || (Line.IsMultiVariableDeclStmt && (Left.NestingLevel == 0 || (Left.NestingLevel == 1 && startsWithInitStatement(Line))))); } if (Right.is(TT_FunctionTypeLParen) && Left.isNot(tok::l_paren) && (!Left.is(TT_PointerOrReference) || (getTokenPointerOrReferenceAlignment(Left) != FormatStyle::PAS_Right && !Line.IsMultiVariableDeclStmt))) { return true; } if (Left.is(TT_PointerOrReference)) { // Add a space if the next token is a pointer qualifier and the style // requires spaces before pointer qualifiers. if ((Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Before || Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Both) && Right.canBePointerOrReferenceQualifier()) { return true; } // & 1 if (Right.Tok.isLiteral()) return true; // & /* comment if (Right.is(TT_BlockComment)) return true; // foo() -> const Bar * override/final if (Right.isOneOf(Keywords.kw_override, Keywords.kw_final) && !Right.is(TT_StartOfName)) { return true; } // & { if (Right.is(tok::l_brace) && Right.is(BK_Block)) return true; // for (auto a = 0, b = 0; const auto& c : {1, 2, 3}) if (Left.Previous && Left.Previous->isTypeOrIdentifier() && Right.Next && Right.Next->is(TT_RangeBasedForLoopColon)) { return getTokenPointerOrReferenceAlignment(Left) != FormatStyle::PAS_Right; } if (Right.isOneOf(TT_PointerOrReference, TT_ArraySubscriptLSquare, tok::l_paren)) { return false; } if (getTokenPointerOrReferenceAlignment(Left) == FormatStyle::PAS_Right) return false; // FIXME: Setting IsMultiVariableDeclStmt for the whole line is error-prone, // because it does not take into account nested scopes like lambdas. // In multi-variable declaration statements, attach */& to the variable // independently of the style. However, avoid doing it if we are in a nested // scope, e.g. lambda. We still need to special-case statements with // initializers. if (Line.IsMultiVariableDeclStmt && (Left.NestingLevel == Line.First->NestingLevel || ((Left.NestingLevel == Line.First->NestingLevel + 1) && startsWithInitStatement(Line)))) { return false; } return Left.Previous && !Left.Previous->isOneOf( tok::l_paren, tok::coloncolon, tok::l_square); } // Ensure right pointer alignment with ellipsis e.g. int *...P if (Left.is(tok::ellipsis) && Left.Previous && Left.Previous->isOneOf(tok::star, tok::amp, tok::ampamp)) { return Style.PointerAlignment != FormatStyle::PAS_Right; } if (Right.is(tok::star) && Left.is(tok::l_paren)) return false; if (Left.is(tok::star) && Right.isOneOf(tok::star, tok::amp, tok::ampamp)) return false; if (Right.isOneOf(tok::star, tok::amp, tok::ampamp)) { const FormatToken *Previous = &Left; while (Previous && !Previous->is(tok::kw_operator)) { if (Previous->is(tok::identifier) || Previous->isSimpleTypeSpecifier()) { Previous = Previous->getPreviousNonComment(); continue; } if (Previous->is(TT_TemplateCloser) && Previous->MatchingParen) { Previous = Previous->MatchingParen->getPreviousNonComment(); continue; } if (Previous->is(tok::coloncolon)) { Previous = Previous->getPreviousNonComment(); continue; } break; } // Space between the type and the * in: // operator void*() // operator char*() // operator void const*() // operator void volatile*() // operator /*comment*/ const char*() // operator volatile /*comment*/ char*() // operator Foo*() // operator C*() // operator std::Foo*() // operator C::D*() // dependent on PointerAlignment style. if (Previous) { if (Previous->endsSequence(tok::kw_operator)) return Style.PointerAlignment != FormatStyle::PAS_Left; if (Previous->is(tok::kw_const) || Previous->is(tok::kw_volatile)) { return (Style.PointerAlignment != FormatStyle::PAS_Left) || (Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_After) || (Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Both); } } } const auto SpaceRequiredForArrayInitializerLSquare = [](const FormatToken &LSquareTok, const FormatStyle &Style) { return Style.SpacesInContainerLiterals || ((Style.Language == FormatStyle::LK_Proto || Style.Language == FormatStyle::LK_TextProto) && !Style.Cpp11BracedListStyle && LSquareTok.endsSequence(tok::l_square, tok::colon, TT_SelectorName)); }; if (Left.is(tok::l_square)) { return (Left.is(TT_ArrayInitializerLSquare) && Right.isNot(tok::r_square) && SpaceRequiredForArrayInitializerLSquare(Left, Style)) || (Left.isOneOf(TT_ArraySubscriptLSquare, TT_StructuredBindingLSquare, TT_LambdaLSquare) && Style.SpacesInSquareBrackets && Right.isNot(tok::r_square)); } if (Right.is(tok::r_square)) { return Right.MatchingParen && ((Right.MatchingParen->is(TT_ArrayInitializerLSquare) && SpaceRequiredForArrayInitializerLSquare(*Right.MatchingParen, Style)) || (Style.SpacesInSquareBrackets && Right.MatchingParen->isOneOf(TT_ArraySubscriptLSquare, TT_StructuredBindingLSquare, TT_LambdaLSquare)) || Right.MatchingParen->is(TT_AttributeParen)); } if (Right.is(tok::l_square) && !Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare, TT_DesignatedInitializerLSquare, TT_StructuredBindingLSquare, TT_AttributeSquare) && !Left.isOneOf(tok::numeric_constant, TT_DictLiteral) && !(!Left.is(tok::r_square) && Style.SpaceBeforeSquareBrackets && Right.is(TT_ArraySubscriptLSquare))) { return false; } if (Left.is(tok::l_brace) && Right.is(tok::r_brace)) return !Left.Children.empty(); // No spaces in "{}". if ((Left.is(tok::l_brace) && Left.isNot(BK_Block)) || (Right.is(tok::r_brace) && Right.MatchingParen && Right.MatchingParen->isNot(BK_Block))) { return Style.Cpp11BracedListStyle ? Style.SpacesInParentheses : true; } if (Left.is(TT_BlockComment)) { // No whitespace in x(/*foo=*/1), except for JavaScript. return Style.isJavaScript() || !Left.TokenText.endswith("=*/"); } // Space between template and attribute. // e.g. template [[nodiscard]] ... if (Left.is(TT_TemplateCloser) && Right.is(TT_AttributeSquare)) return true; // Space before parentheses common for all languages if (Right.is(tok::l_paren)) { if (Left.is(TT_TemplateCloser) && Right.isNot(TT_FunctionTypeLParen)) return spaceRequiredBeforeParens(Right); if (Left.isOneOf(TT_RequiresClause, TT_RequiresClauseInARequiresExpression)) { return Style.SpaceBeforeParensOptions.AfterRequiresInClause || spaceRequiredBeforeParens(Right); } if (Left.is(TT_RequiresExpression)) { return Style.SpaceBeforeParensOptions.AfterRequiresInExpression || spaceRequiredBeforeParens(Right); } if ((Left.is(tok::r_paren) && Left.is(TT_AttributeParen)) || (Left.is(tok::r_square) && Left.is(TT_AttributeSquare))) { return true; } if (Left.is(TT_ForEachMacro)) { return Style.SpaceBeforeParensOptions.AfterForeachMacros || spaceRequiredBeforeParens(Right); } if (Left.is(TT_IfMacro)) { return Style.SpaceBeforeParensOptions.AfterIfMacros || spaceRequiredBeforeParens(Right); } if (Line.Type == LT_ObjCDecl) return true; if (Left.is(tok::semi)) return true; if (Left.isOneOf(tok::pp_elif, tok::kw_for, tok::kw_while, tok::kw_switch, tok::kw_case, TT_ForEachMacro, TT_ObjCForIn) || Left.isIf(Line.Type != LT_PreprocessorDirective)) { return Style.SpaceBeforeParensOptions.AfterControlStatements || spaceRequiredBeforeParens(Right); } // TODO add Operator overloading specific Options to // SpaceBeforeParensOptions if (Right.is(TT_OverloadedOperatorLParen)) return spaceRequiredBeforeParens(Right); // Function declaration or definition if (Line.MightBeFunctionDecl && (Left.is(TT_FunctionDeclarationName))) { if (Line.mightBeFunctionDefinition()) { return Style.SpaceBeforeParensOptions.AfterFunctionDefinitionName || spaceRequiredBeforeParens(Right); } else { return Style.SpaceBeforeParensOptions.AfterFunctionDeclarationName || spaceRequiredBeforeParens(Right); } } // Lambda if (Line.Type != LT_PreprocessorDirective && Left.is(tok::r_square) && Left.MatchingParen && Left.MatchingParen->is(TT_LambdaLSquare)) { return Style.SpaceBeforeParensOptions.AfterFunctionDefinitionName || spaceRequiredBeforeParens(Right); } if (!Left.Previous || Left.Previous->isNot(tok::period)) { if (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch)) { return Style.SpaceBeforeParensOptions.AfterControlStatements || spaceRequiredBeforeParens(Right); } if (Left.isOneOf(tok::kw_new, tok::kw_delete)) { return ((!Line.MightBeFunctionDecl || !Left.Previous) && Style.SpaceBeforeParens != FormatStyle::SBPO_Never) || spaceRequiredBeforeParens(Right); } if (Left.is(tok::r_square) && Left.MatchingParen && Left.MatchingParen->Previous && Left.MatchingParen->Previous->is(tok::kw_delete)) { return (Style.SpaceBeforeParens != FormatStyle::SBPO_Never) || spaceRequiredBeforeParens(Right); } } // Handle builtins like identifiers. if (Line.Type != LT_PreprocessorDirective && (Left.Tok.getIdentifierInfo() || Left.is(tok::r_paren))) { return spaceRequiredBeforeParens(Right); } return false; } if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword) return false; if (Right.is(TT_UnaryOperator)) { return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) && (Left.isNot(tok::colon) || Left.isNot(TT_ObjCMethodExpr)); } if ((Left.isOneOf(tok::identifier, tok::greater, tok::r_square, tok::r_paren) || Left.isSimpleTypeSpecifier()) && Right.is(tok::l_brace) && Right.getNextNonComment() && Right.isNot(BK_Block)) { return false; } if (Left.is(tok::period) || Right.is(tok::period)) return false; // u#str, U#str, L#str, u8#str // uR#str, UR#str, LR#str, u8R#str if (Right.is(tok::hash) && Left.is(tok::identifier) && (Left.TokenText == "L" || Left.TokenText == "u" || Left.TokenText == "U" || Left.TokenText == "u8" || Left.TokenText == "LR" || Left.TokenText == "uR" || Left.TokenText == "UR" || Left.TokenText == "u8R")) { return false; } if (Left.is(TT_TemplateCloser) && Left.MatchingParen && Left.MatchingParen->Previous && (Left.MatchingParen->Previous->is(tok::period) || Left.MatchingParen->Previous->is(tok::coloncolon))) { // Java call to generic function with explicit type: // A.>>DoSomething(); // A::>>DoSomething(); // With a Java 8 method reference. return false; } if (Left.is(TT_TemplateCloser) && Right.is(tok::l_square)) return false; if (Left.is(tok::l_brace) && Left.endsSequence(TT_DictLiteral, tok::at)) { // Objective-C dictionary literal -> no space after opening brace. return false; } if (Right.is(tok::r_brace) && Right.MatchingParen && Right.MatchingParen->endsSequence(TT_DictLiteral, tok::at)) { // Objective-C dictionary literal -> no space before closing brace. return false; } if (Right.getType() == TT_TrailingAnnotation && Right.isOneOf(tok::amp, tok::ampamp) && Left.isOneOf(tok::kw_const, tok::kw_volatile) && (!Right.Next || Right.Next->is(tok::semi))) { // Match const and volatile ref-qualifiers without any additional // qualifiers such as // void Fn() const &; return getTokenReferenceAlignment(Right) != FormatStyle::PAS_Left; } return true; } bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, const FormatToken &Right) const { const FormatToken &Left = *Right.Previous; // If the token is finalized don't touch it (as it could be in a // clang-format-off section). if (Left.Finalized) return Right.hasWhitespaceBefore(); // Never ever merge two words. if (Keywords.isWordLike(Right) && Keywords.isWordLike(Left)) return true; // Leave a space between * and /* to avoid C4138 `comment end` found outside // of comment. if (Left.is(tok::star) && Right.is(tok::comment)) return true; if (Style.isCpp()) { // Space between import . // or import .....; if (Left.is(Keywords.kw_import) && Right.isOneOf(tok::less, tok::ellipsis)) return true; // Space between `module :` and `import :`. if (Left.isOneOf(Keywords.kw_module, Keywords.kw_import) && Right.is(TT_ModulePartitionColon)) { return true; } // No space between import foo:bar but keep a space between import :bar; if (Left.is(tok::identifier) && Right.is(TT_ModulePartitionColon)) return false; // No space between :bar; if (Left.is(TT_ModulePartitionColon) && Right.isOneOf(tok::identifier, tok::kw_private)) { return false; } if (Left.is(tok::ellipsis) && Right.is(tok::identifier) && Line.First->is(Keywords.kw_import)) { return false; } // Space in __attribute__((attr)) ::type. if (Left.is(TT_AttributeParen) && Right.is(tok::coloncolon)) return true; if (Left.is(tok::kw_operator)) return Right.is(tok::coloncolon); if (Right.is(tok::l_brace) && Right.is(BK_BracedInit) && !Left.opensScope() && Style.SpaceBeforeCpp11BracedList) { return true; } if (Left.is(tok::less) && Left.is(TT_OverloadedOperator) && Right.is(TT_TemplateOpener)) { return true; } } else if (Style.Language == FormatStyle::LK_Proto || Style.Language == FormatStyle::LK_TextProto) { if (Right.is(tok::period) && Left.isOneOf(Keywords.kw_optional, Keywords.kw_required, Keywords.kw_repeated, Keywords.kw_extend)) { return true; } if (Right.is(tok::l_paren) && Left.isOneOf(Keywords.kw_returns, Keywords.kw_option)) { return true; } if (Right.isOneOf(tok::l_brace, tok::less) && Left.is(TT_SelectorName)) return true; // Slashes occur in text protocol extension syntax: [type/type] { ... }. if (Left.is(tok::slash) || Right.is(tok::slash)) return false; if (Left.MatchingParen && Left.MatchingParen->is(TT_ProtoExtensionLSquare) && Right.isOneOf(tok::l_brace, tok::less)) { return !Style.Cpp11BracedListStyle; } // A percent is probably part of a formatting specification, such as %lld. if (Left.is(tok::percent)) return false; // Preserve the existence of a space before a percent for cases like 0x%04x // and "%d %d" if (Left.is(tok::numeric_constant) && Right.is(tok::percent)) return Right.hasWhitespaceBefore(); } else if (Style.isJson()) { if (Right.is(tok::colon)) return false; } else if (Style.isCSharp()) { // Require spaces around '{' and before '}' unless they appear in // interpolated strings. Interpolated strings are merged into a single token // so cannot have spaces inserted by this function. // No space between 'this' and '[' if (Left.is(tok::kw_this) && Right.is(tok::l_square)) return false; // No space between 'new' and '(' if (Left.is(tok::kw_new) && Right.is(tok::l_paren)) return false; // Space before { (including space within '{ {'). if (Right.is(tok::l_brace)) return true; // Spaces inside braces. if (Left.is(tok::l_brace) && Right.isNot(tok::r_brace)) return true; if (Left.isNot(tok::l_brace) && Right.is(tok::r_brace)) return true; // Spaces around '=>'. if (Left.is(TT_FatArrow) || Right.is(TT_FatArrow)) return true; // No spaces around attribute target colons if (Left.is(TT_AttributeColon) || Right.is(TT_AttributeColon)) return false; // space between type and variable e.g. Dictionary foo; if (Left.is(TT_TemplateCloser) && Right.is(TT_StartOfName)) return true; // spaces inside square brackets. if (Left.is(tok::l_square) || Right.is(tok::r_square)) return Style.SpacesInSquareBrackets; // No space before ? in nullable types. if (Right.is(TT_CSharpNullable)) return false; // No space before null forgiving '!'. if (Right.is(TT_NonNullAssertion)) return false; // No space between consecutive commas '[,,]'. if (Left.is(tok::comma) && Right.is(tok::comma)) return false; // space after var in `var (key, value)` if (Left.is(Keywords.kw_var) && Right.is(tok::l_paren)) return true; // space between keywords and paren e.g. "using (" if (Right.is(tok::l_paren)) { if (Left.isOneOf(tok::kw_using, Keywords.kw_async, Keywords.kw_when, Keywords.kw_lock)) { return Style.SpaceBeforeParensOptions.AfterControlStatements || spaceRequiredBeforeParens(Right); } } // space between method modifier and opening parenthesis of a tuple return // type if (Left.isOneOf(tok::kw_public, tok::kw_private, tok::kw_protected, tok::kw_virtual, tok::kw_extern, tok::kw_static, Keywords.kw_internal, Keywords.kw_abstract, Keywords.kw_sealed, Keywords.kw_override, Keywords.kw_async, Keywords.kw_unsafe) && Right.is(tok::l_paren)) { return true; } } else if (Style.isJavaScript()) { if (Left.is(TT_FatArrow)) return true; // for await ( ... if (Right.is(tok::l_paren) && Left.is(Keywords.kw_await) && Left.Previous && Left.Previous->is(tok::kw_for)) { return true; } if (Left.is(Keywords.kw_async) && Right.is(tok::l_paren) && Right.MatchingParen) { const FormatToken *Next = Right.MatchingParen->getNextNonComment(); // An async arrow function, for example: `x = async () => foo();`, // as opposed to calling a function called async: `x = async();` if (Next && Next->is(TT_FatArrow)) return true; } if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) || (Right.is(TT_TemplateString) && Right.TokenText.startswith("}"))) { return false; } // In tagged template literals ("html`bar baz`"), there is no space between // the tag identifier and the template string. if (Keywords.IsJavaScriptIdentifier(Left, /* AcceptIdentifierName= */ false) && Right.is(TT_TemplateString)) { return false; } if (Right.is(tok::star) && Left.isOneOf(Keywords.kw_function, Keywords.kw_yield)) { return false; } if (Right.isOneOf(tok::l_brace, tok::l_square) && Left.isOneOf(Keywords.kw_function, Keywords.kw_yield, Keywords.kw_extends, Keywords.kw_implements)) { return true; } if (Right.is(tok::l_paren)) { // JS methods can use some keywords as names (e.g. `delete()`). if (Line.MustBeDeclaration && Left.Tok.getIdentifierInfo()) return false; // Valid JS method names can include keywords, e.g. `foo.delete()` or // `bar.instanceof()`. Recognize call positions by preceding period. if (Left.Previous && Left.Previous->is(tok::period) && Left.Tok.getIdentifierInfo()) { return false; } // Additional unary JavaScript operators that need a space after. if (Left.isOneOf(tok::kw_throw, Keywords.kw_await, Keywords.kw_typeof, tok::kw_void)) { return true; } } // `foo as const;` casts into a const type. if (Left.endsSequence(tok::kw_const, Keywords.kw_as)) return false; if ((Left.isOneOf(Keywords.kw_let, Keywords.kw_var, Keywords.kw_in, tok::kw_const) || // "of" is only a keyword if it appears after another identifier // (e.g. as "const x of y" in a for loop), or after a destructuring // operation (const [x, y] of z, const {a, b} of c). (Left.is(Keywords.kw_of) && Left.Previous && (Left.Previous->is(tok::identifier) || Left.Previous->isOneOf(tok::r_square, tok::r_brace)))) && (!Left.Previous || !Left.Previous->is(tok::period))) { return true; } if (Left.isOneOf(tok::kw_for, Keywords.kw_as) && Left.Previous && Left.Previous->is(tok::period) && Right.is(tok::l_paren)) { return false; } if (Left.is(Keywords.kw_as) && Right.isOneOf(tok::l_square, tok::l_brace, tok::l_paren)) { return true; } if (Left.is(tok::kw_default) && Left.Previous && Left.Previous->is(tok::kw_export)) { return true; } if (Left.is(Keywords.kw_is) && Right.is(tok::l_brace)) return true; if (Right.isOneOf(TT_JsTypeColon, TT_JsTypeOptionalQuestion)) return false; if (Left.is(TT_JsTypeOperator) || Right.is(TT_JsTypeOperator)) return false; if ((Left.is(tok::l_brace) || Right.is(tok::r_brace)) && Line.First->isOneOf(Keywords.kw_import, tok::kw_export)) { return false; } if (Left.is(tok::ellipsis)) return false; if (Left.is(TT_TemplateCloser) && !Right.isOneOf(tok::equal, tok::l_brace, tok::comma, tok::l_square, Keywords.kw_implements, Keywords.kw_extends)) { // Type assertions ('expr') are not followed by whitespace. Other // locations that should have whitespace following are identified by the // above set of follower tokens. return false; } if (Right.is(TT_NonNullAssertion)) return false; if (Left.is(TT_NonNullAssertion) && Right.isOneOf(Keywords.kw_as, Keywords.kw_in)) { return true; // "x! as string", "x! in y" } } else if (Style.Language == FormatStyle::LK_Java) { if (Left.is(tok::r_square) && Right.is(tok::l_brace)) return true; if (Left.is(Keywords.kw_synchronized) && Right.is(tok::l_paren)) { return Style.SpaceBeforeParensOptions.AfterControlStatements || spaceRequiredBeforeParens(Right); } if ((Left.isOneOf(tok::kw_static, tok::kw_public, tok::kw_private, tok::kw_protected) || Left.isOneOf(Keywords.kw_final, Keywords.kw_abstract, Keywords.kw_native)) && Right.is(TT_TemplateOpener)) { return true; } } else if (Style.isVerilog()) { // Don't add space within a delay like `#0`. if (!Left.is(TT_BinaryOperator) && Left.isOneOf(Keywords.kw_verilogHash, Keywords.kw_verilogHashHash)) { return false; } // Add space after a delay. if (!Right.is(tok::semi) && (Left.endsSequence(tok::numeric_constant, Keywords.kw_verilogHash) || Left.endsSequence(tok::numeric_constant, Keywords.kw_verilogHashHash) || (Left.is(tok::r_paren) && Left.MatchingParen && Left.MatchingParen->endsSequence(tok::l_paren, tok::at)))) { return true; } } if (Left.is(TT_ImplicitStringLiteral)) return Right.hasWhitespaceBefore(); if (Line.Type == LT_ObjCMethodDecl) { if (Left.is(TT_ObjCMethodSpecifier)) return true; if (Left.is(tok::r_paren) && canBeObjCSelectorComponent(Right)) { // Don't space between ')' and or ')' and 'new'. 'new' is not a // keyword in Objective-C, and '+ (instancetype)new;' is a standard class // method declaration. return false; } } if (Line.Type == LT_ObjCProperty && (Right.is(tok::equal) || Left.is(tok::equal))) { return false; } if (Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow) || Left.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow)) { return true; } if (Left.is(tok::comma) && !Right.is(TT_OverloadedOperatorLParen)) return true; if (Right.is(tok::comma)) return false; if (Right.is(TT_ObjCBlockLParen)) return true; if (Right.is(TT_CtorInitializerColon)) return Style.SpaceBeforeCtorInitializerColon; if (Right.is(TT_InheritanceColon) && !Style.SpaceBeforeInheritanceColon) return false; if (Right.is(TT_RangeBasedForLoopColon) && !Style.SpaceBeforeRangeBasedForLoopColon) { return false; } if (Left.is(TT_BitFieldColon)) { return Style.BitFieldColonSpacing == FormatStyle::BFCS_Both || Style.BitFieldColonSpacing == FormatStyle::BFCS_After; } if (Right.is(tok::colon)) { if (Line.First->isOneOf(tok::kw_default, tok::kw_case)) return Style.SpaceBeforeCaseColon; const FormatToken *Next = Right.getNextNonComment(); if (!Next || Next->is(tok::semi)) return false; if (Right.is(TT_ObjCMethodExpr)) return false; if (Left.is(tok::question)) return false; if (Right.is(TT_InlineASMColon) && Left.is(tok::coloncolon)) return false; if (Right.is(TT_DictLiteral)) return Style.SpacesInContainerLiterals; if (Right.is(TT_AttributeColon)) return false; if (Right.is(TT_CSharpNamedArgumentColon)) return false; if (Right.is(TT_BitFieldColon)) { return Style.BitFieldColonSpacing == FormatStyle::BFCS_Both || Style.BitFieldColonSpacing == FormatStyle::BFCS_Before; } return true; } // Do not merge "- -" into "--". if ((Left.isOneOf(tok::minus, tok::minusminus) && Right.isOneOf(tok::minus, tok::minusminus)) || (Left.isOneOf(tok::plus, tok::plusplus) && Right.isOneOf(tok::plus, tok::plusplus))) { return true; } if (Left.is(TT_UnaryOperator)) { if (!Right.is(tok::l_paren)) { // The alternative operators for ~ and ! are "compl" and "not". // If they are used instead, we do not want to combine them with // the token to the right, unless that is a left paren. if (Left.is(tok::exclaim) && Left.TokenText == "not") return true; if (Left.is(tok::tilde) && Left.TokenText == "compl") return true; // Lambda captures allow for a lone &, so "&]" needs to be properly // handled. if (Left.is(tok::amp) && Right.is(tok::r_square)) return Style.SpacesInSquareBrackets; } return (Style.SpaceAfterLogicalNot && Left.is(tok::exclaim)) || Right.is(TT_BinaryOperator); } // If the next token is a binary operator or a selector name, we have // incorrectly classified the parenthesis as a cast. FIXME: Detect correctly. if (Left.is(TT_CastRParen)) { return Style.SpaceAfterCStyleCast || Right.isOneOf(TT_BinaryOperator, TT_SelectorName); } auto ShouldAddSpacesInAngles = [this, &Right]() { if (this->Style.SpacesInAngles == FormatStyle::SIAS_Always) return true; if (this->Style.SpacesInAngles == FormatStyle::SIAS_Leave) return Right.hasWhitespaceBefore(); return false; }; if (Left.is(tok::greater) && Right.is(tok::greater)) { if (Style.Language == FormatStyle::LK_TextProto || (Style.Language == FormatStyle::LK_Proto && Left.is(TT_DictLiteral))) { return !Style.Cpp11BracedListStyle; } return Right.is(TT_TemplateCloser) && Left.is(TT_TemplateCloser) && ((Style.Standard < FormatStyle::LS_Cpp11) || ShouldAddSpacesInAngles()); } if (Right.isOneOf(tok::arrow, tok::arrowstar, tok::periodstar) || Left.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar) || (Right.is(tok::period) && Right.isNot(TT_DesignatedInitializerPeriod))) { return false; } if (!Style.SpaceBeforeAssignmentOperators && Left.isNot(TT_TemplateCloser) && Right.getPrecedence() == prec::Assignment) { return false; } if (Style.Language == FormatStyle::LK_Java && Right.is(tok::coloncolon) && (Left.is(tok::identifier) || Left.is(tok::kw_this))) { return false; } if (Right.is(tok::coloncolon) && Left.is(tok::identifier)) { // Generally don't remove existing spaces between an identifier and "::". // The identifier might actually be a macro name such as ALWAYS_INLINE. If // this turns out to be too lenient, add analysis of the identifier itself. return Right.hasWhitespaceBefore(); } if (Right.is(tok::coloncolon) && !Left.isOneOf(tok::l_brace, tok::comment, tok::l_paren)) { // Put a space between < and :: in vector< ::std::string > return (Left.is(TT_TemplateOpener) && ((Style.Standard < FormatStyle::LS_Cpp11) || ShouldAddSpacesInAngles())) || !(Left.isOneOf(tok::l_paren, tok::r_paren, tok::l_square, tok::kw___super, TT_TemplateOpener, TT_TemplateCloser)) || (Left.is(tok::l_paren) && Style.SpacesInParentheses); } if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser))) return ShouldAddSpacesInAngles(); // Space before TT_StructuredBindingLSquare. if (Right.is(TT_StructuredBindingLSquare)) { return !Left.isOneOf(tok::amp, tok::ampamp) || getTokenReferenceAlignment(Left) != FormatStyle::PAS_Right; } // Space before & or && following a TT_StructuredBindingLSquare. if (Right.Next && Right.Next->is(TT_StructuredBindingLSquare) && Right.isOneOf(tok::amp, tok::ampamp)) { return getTokenReferenceAlignment(Right) != FormatStyle::PAS_Left; } if ((Right.is(TT_BinaryOperator) && !Left.is(tok::l_paren)) || (Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) && !Right.is(tok::r_paren))) { return true; } if (Right.is(TT_TemplateOpener) && Left.is(tok::r_paren) && Left.MatchingParen && Left.MatchingParen->is(TT_OverloadedOperatorLParen)) { return false; } if (Right.is(tok::less) && Left.isNot(tok::l_paren) && Line.startsWith(tok::hash)) { return true; } if (Right.is(TT_TrailingUnaryOperator)) return false; if (Left.is(TT_RegexLiteral)) return false; return spaceRequiredBetween(Line, Left, Right); } // Returns 'true' if 'Tok' is a brace we'd want to break before in Allman style. static bool isAllmanBrace(const FormatToken &Tok) { return Tok.is(tok::l_brace) && Tok.is(BK_Block) && !Tok.isOneOf(TT_ObjCBlockLBrace, TT_LambdaLBrace, TT_DictLiteral); } // Returns 'true' if 'Tok' is a function argument. static bool IsFunctionArgument(const FormatToken &Tok) { return Tok.MatchingParen && Tok.MatchingParen->Next && Tok.MatchingParen->Next->isOneOf(tok::comma, tok::r_paren); } static bool isItAnEmptyLambdaAllowed(const FormatToken &Tok, FormatStyle::ShortLambdaStyle ShortLambdaOption) { return Tok.Children.empty() && ShortLambdaOption != FormatStyle::SLS_None; } static bool isAllmanLambdaBrace(const FormatToken &Tok) { return Tok.is(tok::l_brace) && Tok.is(BK_Block) && !Tok.isOneOf(TT_ObjCBlockLBrace, TT_DictLiteral); } // Returns the first token on the line that is not a comment. static const FormatToken *getFirstNonComment(const AnnotatedLine &Line) { const FormatToken *Next = Line.First; if (!Next) return Next; if (Next->is(tok::comment)) Next = Next->getNextNonComment(); return Next; } bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, const FormatToken &Right) const { const FormatToken &Left = *Right.Previous; if (Right.NewlinesBefore > 1 && Style.MaxEmptyLinesToKeep > 0) return true; if (Style.isCSharp()) { if (Left.is(TT_FatArrow) && Right.is(tok::l_brace) && Style.BraceWrapping.AfterFunction) { return true; } if (Right.is(TT_CSharpNamedArgumentColon) || Left.is(TT_CSharpNamedArgumentColon)) { return false; } if (Right.is(TT_CSharpGenericTypeConstraint)) return true; if (Right.Next && Right.Next->is(TT_FatArrow) && (Right.is(tok::numeric_constant) || (Right.is(tok::identifier) && Right.TokenText == "_"))) { return true; } // Break after C# [...] and before public/protected/private/internal. if (Left.is(TT_AttributeSquare) && Left.is(tok::r_square) && (Right.isAccessSpecifier(/*ColonRequired=*/false) || Right.is(Keywords.kw_internal))) { return true; } // Break between ] and [ but only when there are really 2 attributes. if (Left.is(TT_AttributeSquare) && Right.is(TT_AttributeSquare) && Left.is(tok::r_square) && Right.is(tok::l_square)) { return true; } } else if (Style.isJavaScript()) { // FIXME: This might apply to other languages and token kinds. if (Right.is(tok::string_literal) && Left.is(tok::plus) && Left.Previous && Left.Previous->is(tok::string_literal)) { return true; } if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace) && Line.Level == 0 && Left.Previous && Left.Previous->is(tok::equal) && Line.First->isOneOf(tok::identifier, Keywords.kw_import, tok::kw_export, tok::kw_const) && // kw_var/kw_let are pseudo-tokens that are tok::identifier, so match // above. !Line.First->isOneOf(Keywords.kw_var, Keywords.kw_let)) { // Object literals on the top level of a file are treated as "enum-style". // Each key/value pair is put on a separate line, instead of bin-packing. return true; } if (Left.is(tok::l_brace) && Line.Level == 0 && (Line.startsWith(tok::kw_enum) || Line.startsWith(tok::kw_const, tok::kw_enum) || Line.startsWith(tok::kw_export, tok::kw_enum) || Line.startsWith(tok::kw_export, tok::kw_const, tok::kw_enum))) { // JavaScript top-level enum key/value pairs are put on separate lines // instead of bin-packing. return true; } if (Right.is(tok::r_brace) && Left.is(tok::l_brace) && Left.Previous && Left.Previous->is(TT_FatArrow)) { // JS arrow function (=> {...}). switch (Style.AllowShortLambdasOnASingleLine) { case FormatStyle::SLS_All: return false; case FormatStyle::SLS_None: return true; case FormatStyle::SLS_Empty: return !Left.Children.empty(); case FormatStyle::SLS_Inline: // allow one-lining inline (e.g. in function call args) and empty arrow // functions. return (Left.NestingLevel == 0 && Line.Level == 0) && !Left.Children.empty(); } llvm_unreachable("Unknown FormatStyle::ShortLambdaStyle enum"); } if (Right.is(tok::r_brace) && Left.is(tok::l_brace) && !Left.Children.empty()) { // Support AllowShortFunctionsOnASingleLine for JavaScript. return Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_None || Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Empty || (Left.NestingLevel == 0 && Line.Level == 0 && Style.AllowShortFunctionsOnASingleLine & FormatStyle::SFS_InlineOnly); } } else if (Style.Language == FormatStyle::LK_Java) { if (Right.is(tok::plus) && Left.is(tok::string_literal) && Right.Next && Right.Next->is(tok::string_literal)) { return true; } } else if (Style.Language == FormatStyle::LK_Cpp || Style.Language == FormatStyle::LK_ObjC || Style.Language == FormatStyle::LK_Proto || Style.Language == FormatStyle::LK_TableGen || Style.Language == FormatStyle::LK_TextProto) { if (Left.isStringLiteral() && Right.isStringLiteral()) return true; } // Basic JSON newline processing. if (Style.isJson()) { // Always break after a JSON record opener. // { // } if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace)) return true; // Always break after a JSON array opener. // [ // ] if (Left.is(TT_ArrayInitializerLSquare) && Left.is(tok::l_square) && !Right.is(tok::r_square)) { return true; } // Always break after successive entries. // 1, // 2 if (Left.is(tok::comma)) return true; } // If the last token before a '}', ']', or ')' is a comma or a trailing // comment, the intention is to insert a line break after it in order to make // shuffling around entries easier. Import statements, especially in // JavaScript, can be an exception to this rule. if (Style.JavaScriptWrapImports || Line.Type != LT_ImportStatement) { const FormatToken *BeforeClosingBrace = nullptr; if ((Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) || (Style.isJavaScript() && Left.is(tok::l_paren))) && Left.isNot(BK_Block) && Left.MatchingParen) { BeforeClosingBrace = Left.MatchingParen->Previous; } else if (Right.MatchingParen && (Right.MatchingParen->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) || (Style.isJavaScript() && Right.MatchingParen->is(tok::l_paren)))) { BeforeClosingBrace = &Left; } if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) || BeforeClosingBrace->isTrailingComment())) { return true; } } if (Right.is(tok::comment)) { return Left.isNot(BK_BracedInit) && Left.isNot(TT_CtorInitializerColon) && (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline); } if (Left.isTrailingComment()) return true; if (Left.IsUnterminatedLiteral) return true; if (Right.is(tok::lessless) && Right.Next && Left.is(tok::string_literal) && Right.Next->is(tok::string_literal)) { return true; } if (Right.is(TT_RequiresClause)) { switch (Style.RequiresClausePosition) { case FormatStyle::RCPS_OwnLine: case FormatStyle::RCPS_WithFollowing: return true; default: break; } } // Can break after template<> declaration if (Left.ClosesTemplateDeclaration && Left.MatchingParen && Left.MatchingParen->NestingLevel == 0) { // Put concepts on the next line e.g. // template // concept ... if (Right.is(tok::kw_concept)) return Style.BreakBeforeConceptDeclarations == FormatStyle::BBCDS_Always; return Style.AlwaysBreakTemplateDeclarations == FormatStyle::BTDS_Yes; } if (Left.ClosesRequiresClause && Right.isNot(tok::semi)) { switch (Style.RequiresClausePosition) { case FormatStyle::RCPS_OwnLine: case FormatStyle::RCPS_WithPreceding: return true; default: break; } } if (Style.PackConstructorInitializers == FormatStyle::PCIS_Never) { if (Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeColon && (Left.is(TT_CtorInitializerComma) || Right.is(TT_CtorInitializerColon))) { return true; } if (Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon && Left.isOneOf(TT_CtorInitializerColon, TT_CtorInitializerComma)) { return true; } } if (Style.PackConstructorInitializers < FormatStyle::PCIS_CurrentLine && Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma && Right.isOneOf(TT_CtorInitializerComma, TT_CtorInitializerColon)) { return true; } // Break only if we have multiple inheritance. if (Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma && Right.is(TT_InheritanceComma)) { return true; } if (Style.BreakInheritanceList == FormatStyle::BILS_AfterComma && Left.is(TT_InheritanceComma)) { return true; } if (Right.is(tok::string_literal) && Right.TokenText.startswith("R\"")) { // Multiline raw string literals are special wrt. line breaks. The author // has made a deliberate choice and might have aligned the contents of the // string literal accordingly. Thus, we try keep existing line breaks. return Right.IsMultiline && Right.NewlinesBefore > 0; } if ((Left.is(tok::l_brace) || (Left.is(tok::less) && Left.Previous && Left.Previous->is(tok::equal))) && Right.NestingLevel == 1 && Style.Language == FormatStyle::LK_Proto) { // Don't put enums or option definitions onto single lines in protocol // buffers. return true; } if (Right.is(TT_InlineASMBrace)) return Right.HasUnescapedNewline; if (isAllmanBrace(Left) || isAllmanBrace(Right)) { auto FirstNonComment = getFirstNonComment(Line); bool AccessSpecifier = FirstNonComment && FirstNonComment->isOneOf(Keywords.kw_internal, tok::kw_public, tok::kw_private, tok::kw_protected); if (Style.BraceWrapping.AfterEnum) { if (Line.startsWith(tok::kw_enum) || Line.startsWith(tok::kw_typedef, tok::kw_enum)) { return true; } // Ensure BraceWrapping for `public enum A {`. if (AccessSpecifier && FirstNonComment->Next && FirstNonComment->Next->is(tok::kw_enum)) { return true; } } // Ensure BraceWrapping for `public interface A {`. if (Style.BraceWrapping.AfterClass && ((AccessSpecifier && FirstNonComment->Next && FirstNonComment->Next->is(Keywords.kw_interface)) || Line.startsWith(Keywords.kw_interface))) { return true; } return (Line.startsWith(tok::kw_class) && Style.BraceWrapping.AfterClass) || (Line.startsWith(tok::kw_struct) && Style.BraceWrapping.AfterStruct); } if (Left.is(TT_ObjCBlockLBrace) && Style.AllowShortBlocksOnASingleLine == FormatStyle::SBS_Never) { return true; } // Ensure wrapping after __attribute__((XX)) and @interface etc. if (Left.is(TT_AttributeParen) && Right.is(TT_ObjCDecl)) return true; if (Left.is(TT_LambdaLBrace)) { if (IsFunctionArgument(Left) && Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Inline) { return false; } if (Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_None || Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Inline || (!Left.Children.empty() && Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Empty)) { return true; } } if (Style.BraceWrapping.BeforeLambdaBody && Right.is(TT_LambdaLBrace) && Left.isOneOf(tok::star, tok::amp, tok::ampamp, TT_TemplateCloser)) { return true; } // Put multiple Java annotation on a new line. if ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && Left.is(TT_LeadingJavaAnnotation) && Right.isNot(TT_LeadingJavaAnnotation) && Right.isNot(tok::l_paren) && (Line.Last->is(tok::l_brace) || Style.BreakAfterJavaFieldAnnotations)) { return true; } if (Right.is(TT_ProtoExtensionLSquare)) return true; // In text proto instances if a submessage contains at least 2 entries and at // least one of them is a submessage, like A { ... B { ... } ... }, // put all of the entries of A on separate lines by forcing the selector of // the submessage B to be put on a newline. // // Example: these can stay on one line: // a { scalar_1: 1 scalar_2: 2 } // a { b { key: value } } // // and these entries need to be on a new line even if putting them all in one // line is under the column limit: // a { // scalar: 1 // b { key: value } // } // // We enforce this by breaking before a submessage field that has previous // siblings, *and* breaking before a field that follows a submessage field. // // Be careful to exclude the case [proto.ext] { ... } since the `]` is // the TT_SelectorName there, but we don't want to break inside the brackets. // // Another edge case is @submessage { key: value }, which is a common // substitution placeholder. In this case we want to keep `@` and `submessage` // together. // // We ensure elsewhere that extensions are always on their own line. if ((Style.Language == FormatStyle::LK_Proto || Style.Language == FormatStyle::LK_TextProto) && Right.is(TT_SelectorName) && !Right.is(tok::r_square) && Right.Next) { // Keep `@submessage` together in: // @submessage { key: value } if (Left.is(tok::at)) return false; // Look for the scope opener after selector in cases like: // selector { ... // selector: { ... // selector: @base { ... FormatToken *LBrace = Right.Next; if (LBrace && LBrace->is(tok::colon)) { LBrace = LBrace->Next; if (LBrace && LBrace->is(tok::at)) { LBrace = LBrace->Next; if (LBrace) LBrace = LBrace->Next; } } if (LBrace && // The scope opener is one of {, [, <: // selector { ... } // selector [ ... ] // selector < ... > // // In case of selector { ... }, the l_brace is TT_DictLiteral. // In case of an empty selector {}, the l_brace is not TT_DictLiteral, // so we check for immediately following r_brace. ((LBrace->is(tok::l_brace) && (LBrace->is(TT_DictLiteral) || (LBrace->Next && LBrace->Next->is(tok::r_brace)))) || LBrace->is(TT_ArrayInitializerLSquare) || LBrace->is(tok::less))) { // If Left.ParameterCount is 0, then this submessage entry is not the // first in its parent submessage, and we want to break before this entry. // If Left.ParameterCount is greater than 0, then its parent submessage // might contain 1 or more entries and we want to break before this entry // if it contains at least 2 entries. We deal with this case later by // detecting and breaking before the next entry in the parent submessage. if (Left.ParameterCount == 0) return true; // However, if this submessage is the first entry in its parent // submessage, Left.ParameterCount might be 1 in some cases. // We deal with this case later by detecting an entry // following a closing paren of this submessage. } // If this is an entry immediately following a submessage, it will be // preceded by a closing paren of that submessage, like in: // left---. .---right // v v // sub: { ... } key: value // If there was a comment between `}` an `key` above, then `key` would be // put on a new line anyways. if (Left.isOneOf(tok::r_brace, tok::greater, tok::r_square)) return true; } // Deal with lambda arguments in C++ - we want consistent line breaks whether // they happen to be at arg0, arg1 or argN. The selection is a bit nuanced // as aggressive line breaks are placed when the lambda is not the last arg. if ((Style.Language == FormatStyle::LK_Cpp || Style.Language == FormatStyle::LK_ObjC) && Left.is(tok::l_paren) && Left.BlockParameterCount > 0 && !Right.isOneOf(tok::l_paren, TT_LambdaLSquare)) { // Multiple lambdas in the same function call force line breaks. if (Left.BlockParameterCount > 1) return true; // A lambda followed by another arg forces a line break. if (!Left.Role) return false; auto Comma = Left.Role->lastComma(); if (!Comma) return false; auto Next = Comma->getNextNonComment(); if (!Next) return false; if (!Next->isOneOf(TT_LambdaLSquare, tok::l_brace, tok::caret)) return true; } return false; } bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, const FormatToken &Right) const { const FormatToken &Left = *Right.Previous; // Language-specific stuff. if (Style.isCSharp()) { if (Left.isOneOf(TT_CSharpNamedArgumentColon, TT_AttributeColon) || Right.isOneOf(TT_CSharpNamedArgumentColon, TT_AttributeColon)) { return false; } // Only break after commas for generic type constraints. if (Line.First->is(TT_CSharpGenericTypeConstraint)) return Left.is(TT_CSharpGenericTypeConstraintComma); // Keep nullable operators attached to their identifiers. if (Right.is(TT_CSharpNullable)) return false; } else if (Style.Language == FormatStyle::LK_Java) { if (Left.isOneOf(Keywords.kw_throws, Keywords.kw_extends, Keywords.kw_implements)) { return false; } if (Right.isOneOf(Keywords.kw_throws, Keywords.kw_extends, Keywords.kw_implements)) { return true; } } else if (Style.isJavaScript()) { const FormatToken *NonComment = Right.getPreviousNonComment(); if (NonComment && NonComment->isOneOf( tok::kw_return, Keywords.kw_yield, tok::kw_continue, tok::kw_break, tok::kw_throw, Keywords.kw_interface, Keywords.kw_type, tok::kw_static, tok::kw_public, tok::kw_private, tok::kw_protected, Keywords.kw_readonly, Keywords.kw_override, Keywords.kw_abstract, Keywords.kw_get, Keywords.kw_set, Keywords.kw_async, Keywords.kw_await)) { return false; // Otherwise automatic semicolon insertion would trigger. } if (Right.NestingLevel == 0 && (Left.Tok.getIdentifierInfo() || Left.isOneOf(tok::r_square, tok::r_paren)) && Right.isOneOf(tok::l_square, tok::l_paren)) { return false; // Otherwise automatic semicolon insertion would trigger. } if (NonComment && NonComment->is(tok::identifier) && NonComment->TokenText == "asserts") { return false; } if (Left.is(TT_FatArrow) && Right.is(tok::l_brace)) return false; if (Left.is(TT_JsTypeColon)) return true; // Don't wrap between ":" and "!" of a strict prop init ("field!: type;"). if (Left.is(tok::exclaim) && Right.is(tok::colon)) return false; // Look for is type annotations like: // function f(): a is B { ... } // Do not break before is in these cases. if (Right.is(Keywords.kw_is)) { const FormatToken *Next = Right.getNextNonComment(); // If `is` is followed by a colon, it's likely that it's a dict key, so // ignore it for this check. // For example this is common in Polymer: // Polymer({ // is: 'name', // ... // }); if (!Next || !Next->is(tok::colon)) return false; } if (Left.is(Keywords.kw_in)) return Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None; if (Right.is(Keywords.kw_in)) return Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None; if (Right.is(Keywords.kw_as)) return false; // must not break before as in 'x as type' casts if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_infer)) { // extends and infer can appear as keywords in conditional types: // https://www.typescriptlang.org/docs/handbook/release-notes/typescript-2-8.html#conditional-types // do not break before them, as the expressions are subject to ASI. return false; } if (Left.is(Keywords.kw_as)) return true; if (Left.is(TT_NonNullAssertion)) return true; if (Left.is(Keywords.kw_declare) && Right.isOneOf(Keywords.kw_module, tok::kw_namespace, Keywords.kw_function, tok::kw_class, tok::kw_enum, Keywords.kw_interface, Keywords.kw_type, Keywords.kw_var, Keywords.kw_let, tok::kw_const)) { // See grammar for 'declare' statements at: // https://github.com/Microsoft/TypeScript/blob/main/doc/spec-ARCHIVED.md#A.10 return false; } if (Left.isOneOf(Keywords.kw_module, tok::kw_namespace) && Right.isOneOf(tok::identifier, tok::string_literal)) { return false; // must not break in "module foo { ...}" } if (Right.is(TT_TemplateString) && Right.closesScope()) return false; // Don't split tagged template literal so there is a break between the tag // identifier and template string. if (Left.is(tok::identifier) && Right.is(TT_TemplateString)) return false; if (Left.is(TT_TemplateString) && Left.opensScope()) return true; } if (Left.is(tok::at)) return false; if (Left.Tok.getObjCKeywordID() == tok::objc_interface) return false; if (Left.isOneOf(TT_JavaAnnotation, TT_LeadingJavaAnnotation)) return !Right.is(tok::l_paren); if (Right.is(TT_PointerOrReference)) { return Line.IsMultiVariableDeclStmt || (getTokenPointerOrReferenceAlignment(Right) == FormatStyle::PAS_Right && (!Right.Next || Right.Next->isNot(TT_FunctionDeclarationName))); } if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) || Right.is(tok::kw_operator)) { return true; } if (Left.is(TT_PointerOrReference)) return false; if (Right.isTrailingComment()) { // We rely on MustBreakBefore being set correctly here as we should not // change the "binding" behavior of a comment. // The first comment in a braced lists is always interpreted as belonging to // the first list element. Otherwise, it should be placed outside of the // list. return Left.is(BK_BracedInit) || (Left.is(TT_CtorInitializerColon) && Right.NewlinesBefore > 0 && Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon); } if (Left.is(tok::question) && Right.is(tok::colon)) return false; if (Right.is(TT_ConditionalExpr) || Right.is(tok::question)) return Style.BreakBeforeTernaryOperators; if (Left.is(TT_ConditionalExpr) || Left.is(tok::question)) return !Style.BreakBeforeTernaryOperators; if (Left.is(TT_InheritanceColon)) return Style.BreakInheritanceList == FormatStyle::BILS_AfterColon; if (Right.is(TT_InheritanceColon)) return Style.BreakInheritanceList != FormatStyle::BILS_AfterColon; if (Right.is(TT_ObjCMethodExpr) && !Right.is(tok::r_square) && Left.isNot(TT_SelectorName)) { return true; } if (Right.is(tok::colon) && !Right.isOneOf(TT_CtorInitializerColon, TT_InlineASMColon)) { return false; } if (Left.is(tok::colon) && Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)) { if (Style.Language == FormatStyle::LK_Proto || Style.Language == FormatStyle::LK_TextProto) { if (!Style.AlwaysBreakBeforeMultilineStrings && Right.isStringLiteral()) return false; // Prevent cases like: // // submessage: // { key: valueeeeeeeeeeee } // // when the snippet does not fit into one line. // Prefer: // // submessage: { // key: valueeeeeeeeeeee // } // // instead, even if it is longer by one line. // // Note that this allows allows the "{" to go over the column limit // when the column limit is just between ":" and "{", but that does // not happen too often and alternative formattings in this case are // not much better. // // The code covers the cases: // // submessage: { ... } // submessage: < ... > // repeated: [ ... ] if (((Right.is(tok::l_brace) || Right.is(tok::less)) && Right.is(TT_DictLiteral)) || Right.is(TT_ArrayInitializerLSquare)) { return false; } } return true; } if (Right.is(tok::r_square) && Right.MatchingParen && Right.MatchingParen->is(TT_ProtoExtensionLSquare)) { return false; } if (Right.is(TT_SelectorName) || (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_ObjCMethodExpr))) { return Left.isNot(tok::period); // FIXME: Properly parse ObjC calls. } if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty) return true; if (Right.is(tok::kw_concept)) return Style.BreakBeforeConceptDeclarations != FormatStyle::BBCDS_Never; if (Right.is(TT_RequiresClause)) return true; if (Left.ClosesTemplateDeclaration || Left.is(TT_FunctionAnnotationRParen)) return true; if (Left.ClosesRequiresClause) return true; if (Right.isOneOf(TT_RangeBasedForLoopColon, TT_OverloadedOperatorLParen, TT_OverloadedOperator)) { return false; } if (Left.is(TT_RangeBasedForLoopColon)) return true; if (Right.is(TT_RangeBasedForLoopColon)) return false; if (Left.is(TT_TemplateCloser) && Right.is(TT_TemplateOpener)) return true; if ((Left.is(tok::greater) && Right.is(tok::greater)) || (Left.is(tok::less) && Right.is(tok::less))) { return false; } if (Right.is(TT_BinaryOperator) && Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None && (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_All || Right.getPrecedence() != prec::Assignment)) { return true; } if (Left.isOneOf(TT_TemplateCloser, TT_UnaryOperator) || Left.is(tok::kw_operator)) { return false; } if (Left.is(tok::equal) && !Right.isOneOf(tok::kw_default, tok::kw_delete) && Line.Type == LT_VirtualFunctionDecl && Left.NestingLevel == 0) { return false; } if (Left.is(tok::equal) && Right.is(tok::l_brace) && !Style.Cpp11BracedListStyle) { return false; } if (Left.is(tok::l_paren) && Left.isOneOf(TT_AttributeParen, TT_TypeDeclarationParen)) { return false; } if (Left.is(tok::l_paren) && Left.Previous && (Left.Previous->isOneOf(TT_BinaryOperator, TT_CastRParen))) { return false; } if (Right.is(TT_ImplicitStringLiteral)) return false; if (Right.is(TT_TemplateCloser)) return false; if (Right.is(tok::r_square) && Right.MatchingParen && Right.MatchingParen->is(TT_LambdaLSquare)) { return false; } // We only break before r_brace if there was a corresponding break before // the l_brace, which is tracked by BreakBeforeClosingBrace. if (Right.is(tok::r_brace)) return Right.MatchingParen && Right.MatchingParen->is(BK_Block); // We only break before r_paren if we're in a block indented context. if (Right.is(tok::r_paren)) { if (Style.AlignAfterOpenBracket != FormatStyle::BAS_BlockIndent || !Right.MatchingParen) { return false; } const FormatToken *Previous = Right.MatchingParen->Previous; return !(Previous && (Previous->is(tok::kw_for) || Previous->isIf())); } // Allow breaking after a trailing annotation, e.g. after a method // declaration. if (Left.is(TT_TrailingAnnotation)) { return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren, tok::less, tok::coloncolon); } if (Right.is(tok::kw___attribute) || (Right.is(tok::l_square) && Right.is(TT_AttributeSquare))) { return !Left.is(TT_AttributeSquare); } if (Left.is(tok::identifier) && Right.is(tok::string_literal)) return true; if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral)) return true; if (Left.is(TT_CtorInitializerColon)) { return Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon && (!Right.isTrailingComment() || Right.NewlinesBefore > 0); } if (Right.is(TT_CtorInitializerColon)) return Style.BreakConstructorInitializers != FormatStyle::BCIS_AfterColon; if (Left.is(TT_CtorInitializerComma) && Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) { return false; } if (Right.is(TT_CtorInitializerComma) && Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) { return true; } if (Left.is(TT_InheritanceComma) && Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma) { return false; } if (Right.is(TT_InheritanceComma) && Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma) { return true; } if (Left.is(TT_ArrayInitializerLSquare)) return true; if (Right.is(tok::kw_typename) && Left.isNot(tok::kw_const)) return true; if ((Left.isBinaryOperator() || Left.is(TT_BinaryOperator)) && !Left.isOneOf(tok::arrowstar, tok::lessless) && Style.BreakBeforeBinaryOperators != FormatStyle::BOS_All && (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None || Left.getPrecedence() == prec::Assignment)) { return true; } if ((Left.is(TT_AttributeSquare) && Right.is(tok::l_square)) || (Left.is(tok::r_square) && Right.is(TT_AttributeSquare))) { return false; } auto ShortLambdaOption = Style.AllowShortLambdasOnASingleLine; if (Style.BraceWrapping.BeforeLambdaBody && Right.is(TT_LambdaLBrace)) { if (isAllmanLambdaBrace(Left)) return !isItAnEmptyLambdaAllowed(Left, ShortLambdaOption); if (isAllmanLambdaBrace(Right)) return !isItAnEmptyLambdaAllowed(Right, ShortLambdaOption); } return Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace, tok::kw_class, tok::kw_struct, tok::comment) || Right.isMemberAccess() || Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow, tok::lessless, tok::colon, tok::l_square, tok::at) || (Left.is(tok::r_paren) && Right.isOneOf(tok::identifier, tok::kw_const)) || (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) || (Left.is(TT_TemplateOpener) && !Right.is(TT_TemplateCloser)); } void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) const { llvm::errs() << "AnnotatedTokens(L=" << Line.Level << "):\n"; const FormatToken *Tok = Line.First; while (Tok) { llvm::errs() << " M=" << Tok->MustBreakBefore << " C=" << Tok->CanBreakBefore << " T=" << getTokenTypeName(Tok->getType()) << " S=" << Tok->SpacesRequiredBefore << " F=" << Tok->Finalized << " B=" << Tok->BlockParameterCount << " BK=" << Tok->getBlockKind() << " P=" << Tok->SplitPenalty << " Name=" << Tok->Tok.getName() << " L=" << Tok->TotalLength << " PPK=" << Tok->getPackingKind() << " FakeLParens="; for (prec::Level LParen : Tok->FakeLParens) llvm::errs() << LParen << "/"; llvm::errs() << " FakeRParens=" << Tok->FakeRParens; llvm::errs() << " II=" << Tok->Tok.getIdentifierInfo(); llvm::errs() << " Text='" << Tok->TokenText << "'\n"; if (!Tok->Next) assert(Tok == Line.Last); Tok = Tok->Next; } llvm::errs() << "----\n"; } FormatStyle::PointerAlignmentStyle TokenAnnotator::getTokenReferenceAlignment(const FormatToken &Reference) const { assert(Reference.isOneOf(tok::amp, tok::ampamp)); switch (Style.ReferenceAlignment) { case FormatStyle::RAS_Pointer: return Style.PointerAlignment; case FormatStyle::RAS_Left: return FormatStyle::PAS_Left; case FormatStyle::RAS_Right: return FormatStyle::PAS_Right; case FormatStyle::RAS_Middle: return FormatStyle::PAS_Middle; } assert(0); //"Unhandled value of ReferenceAlignment" return Style.PointerAlignment; } FormatStyle::PointerAlignmentStyle TokenAnnotator::getTokenPointerOrReferenceAlignment( const FormatToken &PointerOrReference) const { if (PointerOrReference.isOneOf(tok::amp, tok::ampamp)) { switch (Style.ReferenceAlignment) { case FormatStyle::RAS_Pointer: return Style.PointerAlignment; case FormatStyle::RAS_Left: return FormatStyle::PAS_Left; case FormatStyle::RAS_Right: return FormatStyle::PAS_Right; case FormatStyle::RAS_Middle: return FormatStyle::PAS_Middle; } } assert(PointerOrReference.is(tok::star)); return Style.PointerAlignment; } } // namespace format } // namespace clang diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 0e24237faae5..83081bbf0aa0 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -1,20810 +1,20815 @@ //===--- SemaExpr.cpp - Semantic Analysis for Expressions -----------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements semantic analysis for expressions. // //===----------------------------------------------------------------------===// #include "TreeTransform.h" #include "UsedDeclVisitor.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTLambda.h" #include "clang/AST/ASTMutationListener.h" #include "clang/AST/CXXInheritance.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/EvaluatedExprVisitor.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/ExprObjC.h" #include "clang/AST/ExprOpenMP.h" #include "clang/AST/OperationKinds.h" #include "clang/AST/ParentMapContext.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Type.h" #include "clang/AST/TypeLoc.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/DiagnosticSema.h" #include "clang/Basic/PartialDiagnostic.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/Specifiers.h" #include "clang/Basic/TargetInfo.h" #include "clang/Lex/LiteralSupport.h" #include "clang/Lex/Preprocessor.h" #include "clang/Sema/AnalysisBasedWarnings.h" #include "clang/Sema/DeclSpec.h" #include "clang/Sema/DelayedDiagnostic.h" #include "clang/Sema/Designator.h" #include "clang/Sema/Initialization.h" #include "clang/Sema/Lookup.h" #include "clang/Sema/Overload.h" #include "clang/Sema/ParsedTemplate.h" #include "clang/Sema/Scope.h" #include "clang/Sema/ScopeInfo.h" #include "clang/Sema/SemaFixItUtils.h" #include "clang/Sema/SemaInternal.h" #include "clang/Sema/Template.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/SaveAndRestore.h" #include "llvm/Support/TypeSize.h" using namespace clang; using namespace sema; /// Determine whether the use of this declaration is valid, without /// emitting diagnostics. bool Sema::CanUseDecl(NamedDecl *D, bool TreatUnavailableAsInvalid) { // See if this is an auto-typed variable whose initializer we are parsing. if (ParsingInitForAutoVars.count(D)) return false; // See if this is a deleted function. if (FunctionDecl *FD = dyn_cast(D)) { if (FD->isDeleted()) return false; // If the function has a deduced return type, and we can't deduce it, // then we can't use it either. if (getLangOpts().CPlusPlus14 && FD->getReturnType()->isUndeducedType() && DeduceReturnType(FD, SourceLocation(), /*Diagnose*/ false)) return false; // See if this is an aligned allocation/deallocation function that is // unavailable. if (TreatUnavailableAsInvalid && isUnavailableAlignedAllocationFunction(*FD)) return false; } // See if this function is unavailable. if (TreatUnavailableAsInvalid && D->getAvailability() == AR_Unavailable && cast(CurContext)->getAvailability() != AR_Unavailable) return false; if (isa(D)) return false; return true; } static void DiagnoseUnusedOfDecl(Sema &S, NamedDecl *D, SourceLocation Loc) { // Warn if this is used but marked unused. if (const auto *A = D->getAttr()) { // [[maybe_unused]] should not diagnose uses, but __attribute__((unused)) // should diagnose them. if (A->getSemanticSpelling() != UnusedAttr::CXX11_maybe_unused && A->getSemanticSpelling() != UnusedAttr::C2x_maybe_unused) { const Decl *DC = cast_or_null(S.getCurObjCLexicalContext()); if (DC && !DC->hasAttr()) S.Diag(Loc, diag::warn_used_but_marked_unused) << D; } } } /// Emit a note explaining that this function is deleted. void Sema::NoteDeletedFunction(FunctionDecl *Decl) { assert(Decl && Decl->isDeleted()); if (Decl->isDefaulted()) { // If the method was explicitly defaulted, point at that declaration. if (!Decl->isImplicit()) Diag(Decl->getLocation(), diag::note_implicitly_deleted); // Try to diagnose why this special member function was implicitly // deleted. This might fail, if that reason no longer applies. DiagnoseDeletedDefaultedFunction(Decl); return; } auto *Ctor = dyn_cast(Decl); if (Ctor && Ctor->isInheritingConstructor()) return NoteDeletedInheritingConstructor(Ctor); Diag(Decl->getLocation(), diag::note_availability_specified_here) << Decl << 1; } /// Determine whether a FunctionDecl was ever declared with an /// explicit storage class. static bool hasAnyExplicitStorageClass(const FunctionDecl *D) { for (auto I : D->redecls()) { if (I->getStorageClass() != SC_None) return true; } return false; } /// Check whether we're in an extern inline function and referring to a /// variable or function with internal linkage (C11 6.7.4p3). /// /// This is only a warning because we used to silently accept this code, but /// in many cases it will not behave correctly. This is not enabled in C++ mode /// because the restriction language is a bit weaker (C++11 [basic.def.odr]p6) /// and so while there may still be user mistakes, most of the time we can't /// prove that there are errors. static void diagnoseUseOfInternalDeclInInlineFunction(Sema &S, const NamedDecl *D, SourceLocation Loc) { // This is disabled under C++; there are too many ways for this to fire in // contexts where the warning is a false positive, or where it is technically // correct but benign. if (S.getLangOpts().CPlusPlus) return; // Check if this is an inlined function or method. FunctionDecl *Current = S.getCurFunctionDecl(); if (!Current) return; if (!Current->isInlined()) return; if (!Current->isExternallyVisible()) return; // Check if the decl has internal linkage. if (D->getFormalLinkage() != InternalLinkage) return; // Downgrade from ExtWarn to Extension if // (1) the supposedly external inline function is in the main file, // and probably won't be included anywhere else. // (2) the thing we're referencing is a pure function. // (3) the thing we're referencing is another inline function. // This last can give us false negatives, but it's better than warning on // wrappers for simple C library functions. const FunctionDecl *UsedFn = dyn_cast(D); bool DowngradeWarning = S.getSourceManager().isInMainFile(Loc); if (!DowngradeWarning && UsedFn) DowngradeWarning = UsedFn->isInlined() || UsedFn->hasAttr(); S.Diag(Loc, DowngradeWarning ? diag::ext_internal_in_extern_inline_quiet : diag::ext_internal_in_extern_inline) << /*IsVar=*/!UsedFn << D; S.MaybeSuggestAddingStaticToDecl(Current); S.Diag(D->getCanonicalDecl()->getLocation(), diag::note_entity_declared_at) << D; } void Sema::MaybeSuggestAddingStaticToDecl(const FunctionDecl *Cur) { const FunctionDecl *First = Cur->getFirstDecl(); // Suggest "static" on the function, if possible. if (!hasAnyExplicitStorageClass(First)) { SourceLocation DeclBegin = First->getSourceRange().getBegin(); Diag(DeclBegin, diag::note_convert_inline_to_static) << Cur << FixItHint::CreateInsertion(DeclBegin, "static "); } } /// Determine whether the use of this declaration is valid, and /// emit any corresponding diagnostics. /// /// This routine diagnoses various problems with referencing /// declarations that can occur when using a declaration. For example, /// it might warn if a deprecated or unavailable declaration is being /// used, or produce an error (and return true) if a C++0x deleted /// function is being used. /// /// \returns true if there was an error (this declaration cannot be /// referenced), false otherwise. /// bool Sema::DiagnoseUseOfDecl(NamedDecl *D, ArrayRef Locs, const ObjCInterfaceDecl *UnknownObjCClass, bool ObjCPropertyAccess, bool AvoidPartialAvailabilityChecks, ObjCInterfaceDecl *ClassReceiver) { SourceLocation Loc = Locs.front(); if (getLangOpts().CPlusPlus && isa(D)) { // If there were any diagnostics suppressed by template argument deduction, // emit them now. auto Pos = SuppressedDiagnostics.find(D->getCanonicalDecl()); if (Pos != SuppressedDiagnostics.end()) { for (const PartialDiagnosticAt &Suppressed : Pos->second) Diag(Suppressed.first, Suppressed.second); // Clear out the list of suppressed diagnostics, so that we don't emit // them again for this specialization. However, we don't obsolete this // entry from the table, because we want to avoid ever emitting these // diagnostics again. Pos->second.clear(); } // C++ [basic.start.main]p3: // The function 'main' shall not be used within a program. if (cast(D)->isMain()) Diag(Loc, diag::ext_main_used); diagnoseUnavailableAlignedAllocation(*cast(D), Loc); } // See if this is an auto-typed variable whose initializer we are parsing. if (ParsingInitForAutoVars.count(D)) { if (isa(D)) { Diag(Loc, diag::err_binding_cannot_appear_in_own_initializer) << D->getDeclName(); } else { Diag(Loc, diag::err_auto_variable_cannot_appear_in_own_initializer) << D->getDeclName() << cast(D)->getType(); } return true; } if (FunctionDecl *FD = dyn_cast(D)) { // See if this is a deleted function. if (FD->isDeleted()) { auto *Ctor = dyn_cast(FD); if (Ctor && Ctor->isInheritingConstructor()) Diag(Loc, diag::err_deleted_inherited_ctor_use) << Ctor->getParent() << Ctor->getInheritedConstructor().getConstructor()->getParent(); else Diag(Loc, diag::err_deleted_function_use); NoteDeletedFunction(FD); return true; } // [expr.prim.id]p4 // A program that refers explicitly or implicitly to a function with a // trailing requires-clause whose constraint-expression is not satisfied, // other than to declare it, is ill-formed. [...] // // See if this is a function with constraints that need to be satisfied. // Check this before deducing the return type, as it might instantiate the // definition. if (FD->getTrailingRequiresClause()) { ConstraintSatisfaction Satisfaction; if (CheckFunctionConstraints(FD, Satisfaction, Loc)) // A diagnostic will have already been generated (non-constant // constraint expression, for example) return true; if (!Satisfaction.IsSatisfied) { Diag(Loc, diag::err_reference_to_function_with_unsatisfied_constraints) << D; DiagnoseUnsatisfiedConstraint(Satisfaction); return true; } } // If the function has a deduced return type, and we can't deduce it, // then we can't use it either. if (getLangOpts().CPlusPlus14 && FD->getReturnType()->isUndeducedType() && DeduceReturnType(FD, Loc)) return true; if (getLangOpts().CUDA && !CheckCUDACall(Loc, FD)) return true; if (getLangOpts().SYCLIsDevice && !checkSYCLDeviceFunction(Loc, FD)) return true; } if (auto *MD = dyn_cast(D)) { // Lambdas are only default-constructible or assignable in C++2a onwards. if (MD->getParent()->isLambda() && ((isa(MD) && cast(MD)->isDefaultConstructor()) || MD->isCopyAssignmentOperator() || MD->isMoveAssignmentOperator())) { Diag(Loc, diag::warn_cxx17_compat_lambda_def_ctor_assign) << !isa(MD); } } auto getReferencedObjCProp = [](const NamedDecl *D) -> const ObjCPropertyDecl * { if (const auto *MD = dyn_cast(D)) return MD->findPropertyDecl(); return nullptr; }; if (const ObjCPropertyDecl *ObjCPDecl = getReferencedObjCProp(D)) { if (diagnoseArgIndependentDiagnoseIfAttrs(ObjCPDecl, Loc)) return true; } else if (diagnoseArgIndependentDiagnoseIfAttrs(D, Loc)) { return true; } // [OpenMP 4.0], 2.15 declare reduction Directive, Restrictions // Only the variables omp_in and omp_out are allowed in the combiner. // Only the variables omp_priv and omp_orig are allowed in the // initializer-clause. auto *DRD = dyn_cast(CurContext); if (LangOpts.OpenMP && DRD && !CurContext->containsDecl(D) && isa(D)) { Diag(Loc, diag::err_omp_wrong_var_in_declare_reduction) << getCurFunction()->HasOMPDeclareReductionCombiner; Diag(D->getLocation(), diag::note_entity_declared_at) << D; return true; } // [OpenMP 5.0], 2.19.7.3. declare mapper Directive, Restrictions // List-items in map clauses on this construct may only refer to the declared // variable var and entities that could be referenced by a procedure defined // at the same location if (LangOpts.OpenMP && isa(D) && !isOpenMPDeclareMapperVarDeclAllowed(cast(D))) { Diag(Loc, diag::err_omp_declare_mapper_wrong_var) << getOpenMPDeclareMapperVarName(); Diag(D->getLocation(), diag::note_entity_declared_at) << D; return true; } if (const auto *EmptyD = dyn_cast(D)) { Diag(Loc, diag::err_use_of_empty_using_if_exists); Diag(EmptyD->getLocation(), diag::note_empty_using_if_exists_here); return true; } DiagnoseAvailabilityOfDecl(D, Locs, UnknownObjCClass, ObjCPropertyAccess, AvoidPartialAvailabilityChecks, ClassReceiver); DiagnoseUnusedOfDecl(*this, D, Loc); diagnoseUseOfInternalDeclInInlineFunction(*this, D, Loc); if (auto *VD = dyn_cast(D)) checkTypeSupport(VD->getType(), Loc, VD); if (LangOpts.SYCLIsDevice || (LangOpts.OpenMP && LangOpts.OpenMPIsDevice)) { if (!Context.getTargetInfo().isTLSSupported()) if (const auto *VD = dyn_cast(D)) if (VD->getTLSKind() != VarDecl::TLS_None) targetDiag(*Locs.begin(), diag::err_thread_unsupported); } if (isa(D) && isa(D->getDeclContext()) && !isUnevaluatedContext()) { // C++ [expr.prim.req.nested] p3 // A local parameter shall only appear as an unevaluated operand // (Clause 8) within the constraint-expression. Diag(Loc, diag::err_requires_expr_parameter_referenced_in_evaluated_context) << D; Diag(D->getLocation(), diag::note_entity_declared_at) << D; return true; } return false; } /// DiagnoseSentinelCalls - This routine checks whether a call or /// message-send is to a declaration with the sentinel attribute, and /// if so, it checks that the requirements of the sentinel are /// satisfied. void Sema::DiagnoseSentinelCalls(NamedDecl *D, SourceLocation Loc, ArrayRef Args) { const SentinelAttr *attr = D->getAttr(); if (!attr) return; // The number of formal parameters of the declaration. unsigned numFormalParams; // The kind of declaration. This is also an index into a %select in // the diagnostic. enum CalleeType { CT_Function, CT_Method, CT_Block } calleeType; if (ObjCMethodDecl *MD = dyn_cast(D)) { numFormalParams = MD->param_size(); calleeType = CT_Method; } else if (FunctionDecl *FD = dyn_cast(D)) { numFormalParams = FD->param_size(); calleeType = CT_Function; } else if (isa(D)) { QualType type = cast(D)->getType(); const FunctionType *fn = nullptr; if (const PointerType *ptr = type->getAs()) { fn = ptr->getPointeeType()->getAs(); if (!fn) return; calleeType = CT_Function; } else if (const BlockPointerType *ptr = type->getAs()) { fn = ptr->getPointeeType()->castAs(); calleeType = CT_Block; } else { return; } if (const FunctionProtoType *proto = dyn_cast(fn)) { numFormalParams = proto->getNumParams(); } else { numFormalParams = 0; } } else { return; } // "nullPos" is the number of formal parameters at the end which // effectively count as part of the variadic arguments. This is // useful if you would prefer to not have *any* formal parameters, // but the language forces you to have at least one. unsigned nullPos = attr->getNullPos(); assert((nullPos == 0 || nullPos == 1) && "invalid null position on sentinel"); numFormalParams = (nullPos > numFormalParams ? 0 : numFormalParams - nullPos); // The number of arguments which should follow the sentinel. unsigned numArgsAfterSentinel = attr->getSentinel(); // If there aren't enough arguments for all the formal parameters, // the sentinel, and the args after the sentinel, complain. if (Args.size() < numFormalParams + numArgsAfterSentinel + 1) { Diag(Loc, diag::warn_not_enough_argument) << D->getDeclName(); Diag(D->getLocation(), diag::note_sentinel_here) << int(calleeType); return; } // Otherwise, find the sentinel expression. Expr *sentinelExpr = Args[Args.size() - numArgsAfterSentinel - 1]; if (!sentinelExpr) return; if (sentinelExpr->isValueDependent()) return; if (Context.isSentinelNullExpr(sentinelExpr)) return; // Pick a reasonable string to insert. Optimistically use 'nil', 'nullptr', // or 'NULL' if those are actually defined in the context. Only use // 'nil' for ObjC methods, where it's much more likely that the // variadic arguments form a list of object pointers. SourceLocation MissingNilLoc = getLocForEndOfToken(sentinelExpr->getEndLoc()); std::string NullValue; if (calleeType == CT_Method && PP.isMacroDefined("nil")) NullValue = "nil"; else if (getLangOpts().CPlusPlus11) NullValue = "nullptr"; else if (PP.isMacroDefined("NULL")) NullValue = "NULL"; else NullValue = "(void*) 0"; if (MissingNilLoc.isInvalid()) Diag(Loc, diag::warn_missing_sentinel) << int(calleeType); else Diag(MissingNilLoc, diag::warn_missing_sentinel) << int(calleeType) << FixItHint::CreateInsertion(MissingNilLoc, ", " + NullValue); Diag(D->getLocation(), diag::note_sentinel_here) << int(calleeType); } SourceRange Sema::getExprRange(Expr *E) const { return E ? E->getSourceRange() : SourceRange(); } //===----------------------------------------------------------------------===// // Standard Promotions and Conversions //===----------------------------------------------------------------------===// /// DefaultFunctionArrayConversion (C99 6.3.2.1p3, C99 6.3.2.1p4). ExprResult Sema::DefaultFunctionArrayConversion(Expr *E, bool Diagnose) { // Handle any placeholder expressions which made it here. if (E->hasPlaceholderType()) { ExprResult result = CheckPlaceholderExpr(E); if (result.isInvalid()) return ExprError(); E = result.get(); } QualType Ty = E->getType(); assert(!Ty.isNull() && "DefaultFunctionArrayConversion - missing type"); if (Ty->isFunctionType()) { if (auto *DRE = dyn_cast(E->IgnoreParenCasts())) if (auto *FD = dyn_cast(DRE->getDecl())) if (!checkAddressOfFunctionIsAvailable(FD, Diagnose, E->getExprLoc())) return ExprError(); E = ImpCastExprToType(E, Context.getPointerType(Ty), CK_FunctionToPointerDecay).get(); } else if (Ty->isArrayType()) { // In C90 mode, arrays only promote to pointers if the array expression is // an lvalue. The relevant legalese is C90 6.2.2.1p3: "an lvalue that has // type 'array of type' is converted to an expression that has type 'pointer // to type'...". In C99 this was changed to: C99 6.3.2.1p3: "an expression // that has type 'array of type' ...". The relevant change is "an lvalue" // (C90) to "an expression" (C99). // // C++ 4.2p1: // An lvalue or rvalue of type "array of N T" or "array of unknown bound of // T" can be converted to an rvalue of type "pointer to T". // if (getLangOpts().C99 || getLangOpts().CPlusPlus || E->isLValue()) { ExprResult Res = ImpCastExprToType(E, Context.getArrayDecayedType(Ty), CK_ArrayToPointerDecay); if (Res.isInvalid()) return ExprError(); E = Res.get(); } } return E; } static void CheckForNullPointerDereference(Sema &S, Expr *E) { // Check to see if we are dereferencing a null pointer. If so, // and if not volatile-qualified, this is undefined behavior that the // optimizer will delete, so warn about it. People sometimes try to use this // to get a deterministic trap and are surprised by clang's behavior. This // only handles the pattern "*null", which is a very syntactic check. const auto *UO = dyn_cast(E->IgnoreParenCasts()); if (UO && UO->getOpcode() == UO_Deref && UO->getSubExpr()->getType()->isPointerType()) { const LangAS AS = UO->getSubExpr()->getType()->getPointeeType().getAddressSpace(); if ((!isTargetAddressSpace(AS) || (isTargetAddressSpace(AS) && toTargetAddressSpace(AS) == 0)) && UO->getSubExpr()->IgnoreParenCasts()->isNullPointerConstant( S.Context, Expr::NPC_ValueDependentIsNotNull) && !UO->getType().isVolatileQualified()) { S.DiagRuntimeBehavior(UO->getOperatorLoc(), UO, S.PDiag(diag::warn_indirection_through_null) << UO->getSubExpr()->getSourceRange()); S.DiagRuntimeBehavior(UO->getOperatorLoc(), UO, S.PDiag(diag::note_indirection_through_null)); } } } static void DiagnoseDirectIsaAccess(Sema &S, const ObjCIvarRefExpr *OIRE, SourceLocation AssignLoc, const Expr* RHS) { const ObjCIvarDecl *IV = OIRE->getDecl(); if (!IV) return; DeclarationName MemberName = IV->getDeclName(); IdentifierInfo *Member = MemberName.getAsIdentifierInfo(); if (!Member || !Member->isStr("isa")) return; const Expr *Base = OIRE->getBase(); QualType BaseType = Base->getType(); if (OIRE->isArrow()) BaseType = BaseType->getPointeeType(); if (const ObjCObjectType *OTy = BaseType->getAs()) if (ObjCInterfaceDecl *IDecl = OTy->getInterface()) { ObjCInterfaceDecl *ClassDeclared = nullptr; ObjCIvarDecl *IV = IDecl->lookupInstanceVariable(Member, ClassDeclared); if (!ClassDeclared->getSuperClass() && (*ClassDeclared->ivar_begin()) == IV) { if (RHS) { NamedDecl *ObjectSetClass = S.LookupSingleName(S.TUScope, &S.Context.Idents.get("object_setClass"), SourceLocation(), S.LookupOrdinaryName); if (ObjectSetClass) { SourceLocation RHSLocEnd = S.getLocForEndOfToken(RHS->getEndLoc()); S.Diag(OIRE->getExprLoc(), diag::warn_objc_isa_assign) << FixItHint::CreateInsertion(OIRE->getBeginLoc(), "object_setClass(") << FixItHint::CreateReplacement( SourceRange(OIRE->getOpLoc(), AssignLoc), ",") << FixItHint::CreateInsertion(RHSLocEnd, ")"); } else S.Diag(OIRE->getLocation(), diag::warn_objc_isa_assign); } else { NamedDecl *ObjectGetClass = S.LookupSingleName(S.TUScope, &S.Context.Idents.get("object_getClass"), SourceLocation(), S.LookupOrdinaryName); if (ObjectGetClass) S.Diag(OIRE->getExprLoc(), diag::warn_objc_isa_use) << FixItHint::CreateInsertion(OIRE->getBeginLoc(), "object_getClass(") << FixItHint::CreateReplacement( SourceRange(OIRE->getOpLoc(), OIRE->getEndLoc()), ")"); else S.Diag(OIRE->getLocation(), diag::warn_objc_isa_use); } S.Diag(IV->getLocation(), diag::note_ivar_decl); } } } ExprResult Sema::DefaultLvalueConversion(Expr *E) { // Handle any placeholder expressions which made it here. if (E->hasPlaceholderType()) { ExprResult result = CheckPlaceholderExpr(E); if (result.isInvalid()) return ExprError(); E = result.get(); } // C++ [conv.lval]p1: // A glvalue of a non-function, non-array type T can be // converted to a prvalue. if (!E->isGLValue()) return E; QualType T = E->getType(); assert(!T.isNull() && "r-value conversion on typeless expression?"); // lvalue-to-rvalue conversion cannot be applied to function or array types. if (T->isFunctionType() || T->isArrayType()) return E; // We don't want to throw lvalue-to-rvalue casts on top of // expressions of certain types in C++. if (getLangOpts().CPlusPlus && (E->getType() == Context.OverloadTy || T->isDependentType() || T->isRecordType())) return E; // The C standard is actually really unclear on this point, and // DR106 tells us what the result should be but not why. It's // generally best to say that void types just doesn't undergo // lvalue-to-rvalue at all. Note that expressions of unqualified // 'void' type are never l-values, but qualified void can be. if (T->isVoidType()) return E; // OpenCL usually rejects direct accesses to values of 'half' type. if (getLangOpts().OpenCL && !getOpenCLOptions().isAvailableOption("cl_khr_fp16", getLangOpts()) && T->isHalfType()) { Diag(E->getExprLoc(), diag::err_opencl_half_load_store) << 0 << T; return ExprError(); } CheckForNullPointerDereference(*this, E); if (const ObjCIsaExpr *OISA = dyn_cast(E->IgnoreParenCasts())) { NamedDecl *ObjectGetClass = LookupSingleName(TUScope, &Context.Idents.get("object_getClass"), SourceLocation(), LookupOrdinaryName); if (ObjectGetClass) Diag(E->getExprLoc(), diag::warn_objc_isa_use) << FixItHint::CreateInsertion(OISA->getBeginLoc(), "object_getClass(") << FixItHint::CreateReplacement( SourceRange(OISA->getOpLoc(), OISA->getIsaMemberLoc()), ")"); else Diag(E->getExprLoc(), diag::warn_objc_isa_use); } else if (const ObjCIvarRefExpr *OIRE = dyn_cast(E->IgnoreParenCasts())) DiagnoseDirectIsaAccess(*this, OIRE, SourceLocation(), /* Expr*/nullptr); // C++ [conv.lval]p1: // [...] If T is a non-class type, the type of the prvalue is the // cv-unqualified version of T. Otherwise, the type of the // rvalue is T. // // C99 6.3.2.1p2: // If the lvalue has qualified type, the value has the unqualified // version of the type of the lvalue; otherwise, the value has the // type of the lvalue. if (T.hasQualifiers()) T = T.getUnqualifiedType(); // Under the MS ABI, lock down the inheritance model now. if (T->isMemberPointerType() && Context.getTargetInfo().getCXXABI().isMicrosoft()) (void)isCompleteType(E->getExprLoc(), T); ExprResult Res = CheckLValueToRValueConversionOperand(E); if (Res.isInvalid()) return Res; E = Res.get(); // Loading a __weak object implicitly retains the value, so we need a cleanup to // balance that. if (E->getType().getObjCLifetime() == Qualifiers::OCL_Weak) Cleanup.setExprNeedsCleanups(true); if (E->getType().isDestructedType() == QualType::DK_nontrivial_c_struct) Cleanup.setExprNeedsCleanups(true); // C++ [conv.lval]p3: // If T is cv std::nullptr_t, the result is a null pointer constant. CastKind CK = T->isNullPtrType() ? CK_NullToPointer : CK_LValueToRValue; Res = ImplicitCastExpr::Create(Context, T, CK, E, nullptr, VK_PRValue, CurFPFeatureOverrides()); // C11 6.3.2.1p2: // ... if the lvalue has atomic type, the value has the non-atomic version // of the type of the lvalue ... if (const AtomicType *Atomic = T->getAs()) { T = Atomic->getValueType().getUnqualifiedType(); Res = ImplicitCastExpr::Create(Context, T, CK_AtomicToNonAtomic, Res.get(), nullptr, VK_PRValue, FPOptionsOverride()); } return Res; } ExprResult Sema::DefaultFunctionArrayLvalueConversion(Expr *E, bool Diagnose) { ExprResult Res = DefaultFunctionArrayConversion(E, Diagnose); if (Res.isInvalid()) return ExprError(); Res = DefaultLvalueConversion(Res.get()); if (Res.isInvalid()) return ExprError(); return Res; } /// CallExprUnaryConversions - a special case of an unary conversion /// performed on a function designator of a call expression. ExprResult Sema::CallExprUnaryConversions(Expr *E) { QualType Ty = E->getType(); ExprResult Res = E; // Only do implicit cast for a function type, but not for a pointer // to function type. if (Ty->isFunctionType()) { Res = ImpCastExprToType(E, Context.getPointerType(Ty), CK_FunctionToPointerDecay); if (Res.isInvalid()) return ExprError(); } Res = DefaultLvalueConversion(Res.get()); if (Res.isInvalid()) return ExprError(); return Res.get(); } /// UsualUnaryConversions - Performs various conversions that are common to most /// operators (C99 6.3). The conversions of array and function types are /// sometimes suppressed. For example, the array->pointer conversion doesn't /// apply if the array is an argument to the sizeof or address (&) operators. /// In these instances, this routine should *not* be called. ExprResult Sema::UsualUnaryConversions(Expr *E) { // First, convert to an r-value. ExprResult Res = DefaultFunctionArrayLvalueConversion(E); if (Res.isInvalid()) return ExprError(); E = Res.get(); QualType Ty = E->getType(); assert(!Ty.isNull() && "UsualUnaryConversions - missing type"); LangOptions::FPEvalMethodKind EvalMethod = CurFPFeatures.getFPEvalMethod(); if (EvalMethod != LangOptions::FEM_Source && Ty->isFloatingType() && (getLangOpts().getFPEvalMethod() != LangOptions::FPEvalMethodKind::FEM_UnsetOnCommandLine || PP.getLastFPEvalPragmaLocation().isValid())) { switch (EvalMethod) { default: llvm_unreachable("Unrecognized float evaluation method"); break; case LangOptions::FEM_UnsetOnCommandLine: llvm_unreachable("Float evaluation method should be set by now"); break; case LangOptions::FEM_Double: if (Context.getFloatingTypeOrder(Context.DoubleTy, Ty) > 0) // Widen the expression to double. return Ty->isComplexType() ? ImpCastExprToType(E, Context.getComplexType(Context.DoubleTy), CK_FloatingComplexCast) : ImpCastExprToType(E, Context.DoubleTy, CK_FloatingCast); break; case LangOptions::FEM_Extended: if (Context.getFloatingTypeOrder(Context.LongDoubleTy, Ty) > 0) // Widen the expression to long double. return Ty->isComplexType() ? ImpCastExprToType( E, Context.getComplexType(Context.LongDoubleTy), CK_FloatingComplexCast) : ImpCastExprToType(E, Context.LongDoubleTy, CK_FloatingCast); break; } } // Half FP have to be promoted to float unless it is natively supported if (Ty->isHalfType() && !getLangOpts().NativeHalfType) return ImpCastExprToType(Res.get(), Context.FloatTy, CK_FloatingCast); // Try to perform integral promotions if the object has a theoretically // promotable type. if (Ty->isIntegralOrUnscopedEnumerationType()) { // C99 6.3.1.1p2: // // The following may be used in an expression wherever an int or // unsigned int may be used: // - an object or expression with an integer type whose integer // conversion rank is less than or equal to the rank of int // and unsigned int. // - A bit-field of type _Bool, int, signed int, or unsigned int. // // If an int can represent all values of the original type, the // value is converted to an int; otherwise, it is converted to an // unsigned int. These are called the integer promotions. All // other types are unchanged by the integer promotions. QualType PTy = Context.isPromotableBitField(E); if (!PTy.isNull()) { E = ImpCastExprToType(E, PTy, CK_IntegralCast).get(); return E; } if (Ty->isPromotableIntegerType()) { QualType PT = Context.getPromotedIntegerType(Ty); E = ImpCastExprToType(E, PT, CK_IntegralCast).get(); return E; } } return E; } /// DefaultArgumentPromotion (C99 6.5.2.2p6). Used for function calls that /// do not have a prototype. Arguments that have type float or __fp16 /// are promoted to double. All other argument types are converted by /// UsualUnaryConversions(). ExprResult Sema::DefaultArgumentPromotion(Expr *E) { QualType Ty = E->getType(); assert(!Ty.isNull() && "DefaultArgumentPromotion - missing type"); ExprResult Res = UsualUnaryConversions(E); if (Res.isInvalid()) return ExprError(); E = Res.get(); // If this is a 'float' or '__fp16' (CVR qualified or typedef) // promote to double. // Note that default argument promotion applies only to float (and // half/fp16); it does not apply to _Float16. const BuiltinType *BTy = Ty->getAs(); if (BTy && (BTy->getKind() == BuiltinType::Half || BTy->getKind() == BuiltinType::Float)) { if (getLangOpts().OpenCL && !getOpenCLOptions().isAvailableOption("cl_khr_fp64", getLangOpts())) { if (BTy->getKind() == BuiltinType::Half) { E = ImpCastExprToType(E, Context.FloatTy, CK_FloatingCast).get(); } } else { E = ImpCastExprToType(E, Context.DoubleTy, CK_FloatingCast).get(); } } if (BTy && getLangOpts().getExtendIntArgs() == LangOptions::ExtendArgsKind::ExtendTo64 && Context.getTargetInfo().supportsExtendIntArgs() && Ty->isIntegerType() && Context.getTypeSizeInChars(BTy) < Context.getTypeSizeInChars(Context.LongLongTy)) { E = (Ty->isUnsignedIntegerType()) ? ImpCastExprToType(E, Context.UnsignedLongLongTy, CK_IntegralCast) .get() : ImpCastExprToType(E, Context.LongLongTy, CK_IntegralCast).get(); assert(8 == Context.getTypeSizeInChars(Context.LongLongTy).getQuantity() && "Unexpected typesize for LongLongTy"); } // C++ performs lvalue-to-rvalue conversion as a default argument // promotion, even on class types, but note: // C++11 [conv.lval]p2: // When an lvalue-to-rvalue conversion occurs in an unevaluated // operand or a subexpression thereof the value contained in the // referenced object is not accessed. Otherwise, if the glvalue // has a class type, the conversion copy-initializes a temporary // of type T from the glvalue and the result of the conversion // is a prvalue for the temporary. // FIXME: add some way to gate this entire thing for correctness in // potentially potentially evaluated contexts. if (getLangOpts().CPlusPlus && E->isGLValue() && !isUnevaluatedContext()) { ExprResult Temp = PerformCopyInitialization( InitializedEntity::InitializeTemporary(E->getType()), E->getExprLoc(), E); if (Temp.isInvalid()) return ExprError(); E = Temp.get(); } return E; } /// Determine the degree of POD-ness for an expression. /// Incomplete types are considered POD, since this check can be performed /// when we're in an unevaluated context. Sema::VarArgKind Sema::isValidVarArgType(const QualType &Ty) { if (Ty->isIncompleteType()) { // C++11 [expr.call]p7: // After these conversions, if the argument does not have arithmetic, // enumeration, pointer, pointer to member, or class type, the program // is ill-formed. // // Since we've already performed array-to-pointer and function-to-pointer // decay, the only such type in C++ is cv void. This also handles // initializer lists as variadic arguments. if (Ty->isVoidType()) return VAK_Invalid; if (Ty->isObjCObjectType()) return VAK_Invalid; return VAK_Valid; } if (Ty.isDestructedType() == QualType::DK_nontrivial_c_struct) return VAK_Invalid; if (Ty.isCXX98PODType(Context)) return VAK_Valid; // C++11 [expr.call]p7: // Passing a potentially-evaluated argument of class type (Clause 9) // having a non-trivial copy constructor, a non-trivial move constructor, // or a non-trivial destructor, with no corresponding parameter, // is conditionally-supported with implementation-defined semantics. if (getLangOpts().CPlusPlus11 && !Ty->isDependentType()) if (CXXRecordDecl *Record = Ty->getAsCXXRecordDecl()) if (!Record->hasNonTrivialCopyConstructor() && !Record->hasNonTrivialMoveConstructor() && !Record->hasNonTrivialDestructor()) return VAK_ValidInCXX11; if (getLangOpts().ObjCAutoRefCount && Ty->isObjCLifetimeType()) return VAK_Valid; if (Ty->isObjCObjectType()) return VAK_Invalid; if (getLangOpts().MSVCCompat) return VAK_MSVCUndefined; // FIXME: In C++11, these cases are conditionally-supported, meaning we're // permitted to reject them. We should consider doing so. return VAK_Undefined; } void Sema::checkVariadicArgument(const Expr *E, VariadicCallType CT) { // Don't allow one to pass an Objective-C interface to a vararg. const QualType &Ty = E->getType(); VarArgKind VAK = isValidVarArgType(Ty); // Complain about passing non-POD types through varargs. switch (VAK) { case VAK_ValidInCXX11: DiagRuntimeBehavior( E->getBeginLoc(), nullptr, PDiag(diag::warn_cxx98_compat_pass_non_pod_arg_to_vararg) << Ty << CT); LLVM_FALLTHROUGH; case VAK_Valid: if (Ty->isRecordType()) { // This is unlikely to be what the user intended. If the class has a // 'c_str' member function, the user probably meant to call that. DiagRuntimeBehavior(E->getBeginLoc(), nullptr, PDiag(diag::warn_pass_class_arg_to_vararg) << Ty << CT << hasCStrMethod(E) << ".c_str()"); } break; case VAK_Undefined: case VAK_MSVCUndefined: DiagRuntimeBehavior(E->getBeginLoc(), nullptr, PDiag(diag::warn_cannot_pass_non_pod_arg_to_vararg) << getLangOpts().CPlusPlus11 << Ty << CT); break; case VAK_Invalid: if (Ty.isDestructedType() == QualType::DK_nontrivial_c_struct) Diag(E->getBeginLoc(), diag::err_cannot_pass_non_trivial_c_struct_to_vararg) << Ty << CT; else if (Ty->isObjCObjectType()) DiagRuntimeBehavior(E->getBeginLoc(), nullptr, PDiag(diag::err_cannot_pass_objc_interface_to_vararg) << Ty << CT); else Diag(E->getBeginLoc(), diag::err_cannot_pass_to_vararg) << isa(E) << Ty << CT; break; } } /// DefaultVariadicArgumentPromotion - Like DefaultArgumentPromotion, but /// will create a trap if the resulting type is not a POD type. ExprResult Sema::DefaultVariadicArgumentPromotion(Expr *E, VariadicCallType CT, FunctionDecl *FDecl) { if (const BuiltinType *PlaceholderTy = E->getType()->getAsPlaceholderType()) { // Strip the unbridged-cast placeholder expression off, if applicable. if (PlaceholderTy->getKind() == BuiltinType::ARCUnbridgedCast && (CT == VariadicMethod || (FDecl && FDecl->hasAttr()))) { E = stripARCUnbridgedCast(E); // Otherwise, do normal placeholder checking. } else { ExprResult ExprRes = CheckPlaceholderExpr(E); if (ExprRes.isInvalid()) return ExprError(); E = ExprRes.get(); } } ExprResult ExprRes = DefaultArgumentPromotion(E); if (ExprRes.isInvalid()) return ExprError(); // Copy blocks to the heap. if (ExprRes.get()->getType()->isBlockPointerType()) maybeExtendBlockObject(ExprRes); E = ExprRes.get(); // Diagnostics regarding non-POD argument types are // emitted along with format string checking in Sema::CheckFunctionCall(). if (isValidVarArgType(E->getType()) == VAK_Undefined) { // Turn this into a trap. CXXScopeSpec SS; SourceLocation TemplateKWLoc; UnqualifiedId Name; Name.setIdentifier(PP.getIdentifierInfo("__builtin_trap"), E->getBeginLoc()); ExprResult TrapFn = ActOnIdExpression(TUScope, SS, TemplateKWLoc, Name, /*HasTrailingLParen=*/true, /*IsAddressOfOperand=*/false); if (TrapFn.isInvalid()) return ExprError(); ExprResult Call = BuildCallExpr(TUScope, TrapFn.get(), E->getBeginLoc(), None, E->getEndLoc()); if (Call.isInvalid()) return ExprError(); ExprResult Comma = ActOnBinOp(TUScope, E->getBeginLoc(), tok::comma, Call.get(), E); if (Comma.isInvalid()) return ExprError(); return Comma.get(); } if (!getLangOpts().CPlusPlus && RequireCompleteType(E->getExprLoc(), E->getType(), diag::err_call_incomplete_argument)) return ExprError(); return E; } /// Converts an integer to complex float type. Helper function of /// UsualArithmeticConversions() /// /// \return false if the integer expression is an integer type and is /// successfully converted to the complex type. static bool handleIntegerToComplexFloatConversion(Sema &S, ExprResult &IntExpr, ExprResult &ComplexExpr, QualType IntTy, QualType ComplexTy, bool SkipCast) { if (IntTy->isComplexType() || IntTy->isRealFloatingType()) return true; if (SkipCast) return false; if (IntTy->isIntegerType()) { QualType fpTy = cast(ComplexTy)->getElementType(); IntExpr = S.ImpCastExprToType(IntExpr.get(), fpTy, CK_IntegralToFloating); IntExpr = S.ImpCastExprToType(IntExpr.get(), ComplexTy, CK_FloatingRealToComplex); } else { assert(IntTy->isComplexIntegerType()); IntExpr = S.ImpCastExprToType(IntExpr.get(), ComplexTy, CK_IntegralComplexToFloatingComplex); } return false; } /// Handle arithmetic conversion with complex types. Helper function of /// UsualArithmeticConversions() static QualType handleComplexFloatConversion(Sema &S, ExprResult &LHS, ExprResult &RHS, QualType LHSType, QualType RHSType, bool IsCompAssign) { // if we have an integer operand, the result is the complex type. if (!handleIntegerToComplexFloatConversion(S, RHS, LHS, RHSType, LHSType, /*skipCast*/false)) return LHSType; if (!handleIntegerToComplexFloatConversion(S, LHS, RHS, LHSType, RHSType, /*skipCast*/IsCompAssign)) return RHSType; // This handles complex/complex, complex/float, or float/complex. // When both operands are complex, the shorter operand is converted to the // type of the longer, and that is the type of the result. This corresponds // to what is done when combining two real floating-point operands. // The fun begins when size promotion occur across type domains. // From H&S 6.3.4: When one operand is complex and the other is a real // floating-point type, the less precise type is converted, within it's // real or complex domain, to the precision of the other type. For example, // when combining a "long double" with a "double _Complex", the // "double _Complex" is promoted to "long double _Complex". // Compute the rank of the two types, regardless of whether they are complex. int Order = S.Context.getFloatingTypeOrder(LHSType, RHSType); auto *LHSComplexType = dyn_cast(LHSType); auto *RHSComplexType = dyn_cast(RHSType); QualType LHSElementType = LHSComplexType ? LHSComplexType->getElementType() : LHSType; QualType RHSElementType = RHSComplexType ? RHSComplexType->getElementType() : RHSType; QualType ResultType = S.Context.getComplexType(LHSElementType); if (Order < 0) { // Promote the precision of the LHS if not an assignment. ResultType = S.Context.getComplexType(RHSElementType); if (!IsCompAssign) { if (LHSComplexType) LHS = S.ImpCastExprToType(LHS.get(), ResultType, CK_FloatingComplexCast); else LHS = S.ImpCastExprToType(LHS.get(), RHSElementType, CK_FloatingCast); } } else if (Order > 0) { // Promote the precision of the RHS. if (RHSComplexType) RHS = S.ImpCastExprToType(RHS.get(), ResultType, CK_FloatingComplexCast); else RHS = S.ImpCastExprToType(RHS.get(), LHSElementType, CK_FloatingCast); } return ResultType; } /// Handle arithmetic conversion from integer to float. Helper function /// of UsualArithmeticConversions() static QualType handleIntToFloatConversion(Sema &S, ExprResult &FloatExpr, ExprResult &IntExpr, QualType FloatTy, QualType IntTy, bool ConvertFloat, bool ConvertInt) { if (IntTy->isIntegerType()) { if (ConvertInt) // Convert intExpr to the lhs floating point type. IntExpr = S.ImpCastExprToType(IntExpr.get(), FloatTy, CK_IntegralToFloating); return FloatTy; } // Convert both sides to the appropriate complex float. assert(IntTy->isComplexIntegerType()); QualType result = S.Context.getComplexType(FloatTy); // _Complex int -> _Complex float if (ConvertInt) IntExpr = S.ImpCastExprToType(IntExpr.get(), result, CK_IntegralComplexToFloatingComplex); // float -> _Complex float if (ConvertFloat) FloatExpr = S.ImpCastExprToType(FloatExpr.get(), result, CK_FloatingRealToComplex); return result; } /// Handle arithmethic conversion with floating point types. Helper /// function of UsualArithmeticConversions() static QualType handleFloatConversion(Sema &S, ExprResult &LHS, ExprResult &RHS, QualType LHSType, QualType RHSType, bool IsCompAssign) { bool LHSFloat = LHSType->isRealFloatingType(); bool RHSFloat = RHSType->isRealFloatingType(); // N1169 4.1.4: If one of the operands has a floating type and the other // operand has a fixed-point type, the fixed-point operand // is converted to the floating type [...] if (LHSType->isFixedPointType() || RHSType->isFixedPointType()) { if (LHSFloat) RHS = S.ImpCastExprToType(RHS.get(), LHSType, CK_FixedPointToFloating); else if (!IsCompAssign) LHS = S.ImpCastExprToType(LHS.get(), RHSType, CK_FixedPointToFloating); return LHSFloat ? LHSType : RHSType; } // If we have two real floating types, convert the smaller operand // to the bigger result. if (LHSFloat && RHSFloat) { int order = S.Context.getFloatingTypeOrder(LHSType, RHSType); if (order > 0) { RHS = S.ImpCastExprToType(RHS.get(), LHSType, CK_FloatingCast); return LHSType; } assert(order < 0 && "illegal float comparison"); if (!IsCompAssign) LHS = S.ImpCastExprToType(LHS.get(), RHSType, CK_FloatingCast); return RHSType; } if (LHSFloat) { // Half FP has to be promoted to float unless it is natively supported if (LHSType->isHalfType() && !S.getLangOpts().NativeHalfType) LHSType = S.Context.FloatTy; return handleIntToFloatConversion(S, LHS, RHS, LHSType, RHSType, /*ConvertFloat=*/!IsCompAssign, /*ConvertInt=*/ true); } assert(RHSFloat); return handleIntToFloatConversion(S, RHS, LHS, RHSType, LHSType, /*ConvertFloat=*/ true, /*ConvertInt=*/!IsCompAssign); } /// Diagnose attempts to convert between __float128, __ibm128 and /// long double if there is no support for such conversion. /// Helper function of UsualArithmeticConversions(). static bool unsupportedTypeConversion(const Sema &S, QualType LHSType, QualType RHSType) { // No issue if either is not a floating point type. if (!LHSType->isFloatingType() || !RHSType->isFloatingType()) return false; // No issue if both have the same 128-bit float semantics. auto *LHSComplex = LHSType->getAs(); auto *RHSComplex = RHSType->getAs(); QualType LHSElem = LHSComplex ? LHSComplex->getElementType() : LHSType; QualType RHSElem = RHSComplex ? RHSComplex->getElementType() : RHSType; const llvm::fltSemantics &LHSSem = S.Context.getFloatTypeSemantics(LHSElem); const llvm::fltSemantics &RHSSem = S.Context.getFloatTypeSemantics(RHSElem); if ((&LHSSem != &llvm::APFloat::PPCDoubleDouble() || &RHSSem != &llvm::APFloat::IEEEquad()) && (&LHSSem != &llvm::APFloat::IEEEquad() || &RHSSem != &llvm::APFloat::PPCDoubleDouble())) return false; return true; } typedef ExprResult PerformCastFn(Sema &S, Expr *operand, QualType toType); namespace { /// These helper callbacks are placed in an anonymous namespace to /// permit their use as function template parameters. ExprResult doIntegralCast(Sema &S, Expr *op, QualType toType) { return S.ImpCastExprToType(op, toType, CK_IntegralCast); } ExprResult doComplexIntegralCast(Sema &S, Expr *op, QualType toType) { return S.ImpCastExprToType(op, S.Context.getComplexType(toType), CK_IntegralComplexCast); } } /// Handle integer arithmetic conversions. Helper function of /// UsualArithmeticConversions() template static QualType handleIntegerConversion(Sema &S, ExprResult &LHS, ExprResult &RHS, QualType LHSType, QualType RHSType, bool IsCompAssign) { // The rules for this case are in C99 6.3.1.8 int order = S.Context.getIntegerTypeOrder(LHSType, RHSType); bool LHSSigned = LHSType->hasSignedIntegerRepresentation(); bool RHSSigned = RHSType->hasSignedIntegerRepresentation(); if (LHSSigned == RHSSigned) { // Same signedness; use the higher-ranked type if (order >= 0) { RHS = (*doRHSCast)(S, RHS.get(), LHSType); return LHSType; } else if (!IsCompAssign) LHS = (*doLHSCast)(S, LHS.get(), RHSType); return RHSType; } else if (order != (LHSSigned ? 1 : -1)) { // The unsigned type has greater than or equal rank to the // signed type, so use the unsigned type if (RHSSigned) { RHS = (*doRHSCast)(S, RHS.get(), LHSType); return LHSType; } else if (!IsCompAssign) LHS = (*doLHSCast)(S, LHS.get(), RHSType); return RHSType; } else if (S.Context.getIntWidth(LHSType) != S.Context.getIntWidth(RHSType)) { // The two types are different widths; if we are here, that // means the signed type is larger than the unsigned type, so // use the signed type. if (LHSSigned) { RHS = (*doRHSCast)(S, RHS.get(), LHSType); return LHSType; } else if (!IsCompAssign) LHS = (*doLHSCast)(S, LHS.get(), RHSType); return RHSType; } else { // The signed type is higher-ranked than the unsigned type, // but isn't actually any bigger (like unsigned int and long // on most 32-bit systems). Use the unsigned type corresponding // to the signed type. QualType result = S.Context.getCorrespondingUnsignedType(LHSSigned ? LHSType : RHSType); RHS = (*doRHSCast)(S, RHS.get(), result); if (!IsCompAssign) LHS = (*doLHSCast)(S, LHS.get(), result); return result; } } /// Handle conversions with GCC complex int extension. Helper function /// of UsualArithmeticConversions() static QualType handleComplexIntConversion(Sema &S, ExprResult &LHS, ExprResult &RHS, QualType LHSType, QualType RHSType, bool IsCompAssign) { const ComplexType *LHSComplexInt = LHSType->getAsComplexIntegerType(); const ComplexType *RHSComplexInt = RHSType->getAsComplexIntegerType(); if (LHSComplexInt && RHSComplexInt) { QualType LHSEltType = LHSComplexInt->getElementType(); QualType RHSEltType = RHSComplexInt->getElementType(); QualType ScalarType = handleIntegerConversion (S, LHS, RHS, LHSEltType, RHSEltType, IsCompAssign); return S.Context.getComplexType(ScalarType); } if (LHSComplexInt) { QualType LHSEltType = LHSComplexInt->getElementType(); QualType ScalarType = handleIntegerConversion (S, LHS, RHS, LHSEltType, RHSType, IsCompAssign); QualType ComplexType = S.Context.getComplexType(ScalarType); RHS = S.ImpCastExprToType(RHS.get(), ComplexType, CK_IntegralRealToComplex); return ComplexType; } assert(RHSComplexInt); QualType RHSEltType = RHSComplexInt->getElementType(); QualType ScalarType = handleIntegerConversion (S, LHS, RHS, LHSType, RHSEltType, IsCompAssign); QualType ComplexType = S.Context.getComplexType(ScalarType); if (!IsCompAssign) LHS = S.ImpCastExprToType(LHS.get(), ComplexType, CK_IntegralRealToComplex); return ComplexType; } /// Return the rank of a given fixed point or integer type. The value itself /// doesn't matter, but the values must be increasing with proper increasing /// rank as described in N1169 4.1.1. static unsigned GetFixedPointRank(QualType Ty) { const auto *BTy = Ty->getAs(); assert(BTy && "Expected a builtin type."); switch (BTy->getKind()) { case BuiltinType::ShortFract: case BuiltinType::UShortFract: case BuiltinType::SatShortFract: case BuiltinType::SatUShortFract: return 1; case BuiltinType::Fract: case BuiltinType::UFract: case BuiltinType::SatFract: case BuiltinType::SatUFract: return 2; case BuiltinType::LongFract: case BuiltinType::ULongFract: case BuiltinType::SatLongFract: case BuiltinType::SatULongFract: return 3; case BuiltinType::ShortAccum: case BuiltinType::UShortAccum: case BuiltinType::SatShortAccum: case BuiltinType::SatUShortAccum: return 4; case BuiltinType::Accum: case BuiltinType::UAccum: case BuiltinType::SatAccum: case BuiltinType::SatUAccum: return 5; case BuiltinType::LongAccum: case BuiltinType::ULongAccum: case BuiltinType::SatLongAccum: case BuiltinType::SatULongAccum: return 6; default: if (BTy->isInteger()) return 0; llvm_unreachable("Unexpected fixed point or integer type"); } } /// handleFixedPointConversion - Fixed point operations between fixed /// point types and integers or other fixed point types do not fall under /// usual arithmetic conversion since these conversions could result in loss /// of precsision (N1169 4.1.4). These operations should be calculated with /// the full precision of their result type (N1169 4.1.6.2.1). static QualType handleFixedPointConversion(Sema &S, QualType LHSTy, QualType RHSTy) { assert((LHSTy->isFixedPointType() || RHSTy->isFixedPointType()) && "Expected at least one of the operands to be a fixed point type"); assert((LHSTy->isFixedPointOrIntegerType() || RHSTy->isFixedPointOrIntegerType()) && "Special fixed point arithmetic operation conversions are only " "applied to ints or other fixed point types"); // If one operand has signed fixed-point type and the other operand has // unsigned fixed-point type, then the unsigned fixed-point operand is // converted to its corresponding signed fixed-point type and the resulting // type is the type of the converted operand. if (RHSTy->isSignedFixedPointType() && LHSTy->isUnsignedFixedPointType()) LHSTy = S.Context.getCorrespondingSignedFixedPointType(LHSTy); else if (RHSTy->isUnsignedFixedPointType() && LHSTy->isSignedFixedPointType()) RHSTy = S.Context.getCorrespondingSignedFixedPointType(RHSTy); // The result type is the type with the highest rank, whereby a fixed-point // conversion rank is always greater than an integer conversion rank; if the // type of either of the operands is a saturating fixedpoint type, the result // type shall be the saturating fixed-point type corresponding to the type // with the highest rank; the resulting value is converted (taking into // account rounding and overflow) to the precision of the resulting type. // Same ranks between signed and unsigned types are resolved earlier, so both // types are either signed or both unsigned at this point. unsigned LHSTyRank = GetFixedPointRank(LHSTy); unsigned RHSTyRank = GetFixedPointRank(RHSTy); QualType ResultTy = LHSTyRank > RHSTyRank ? LHSTy : RHSTy; if (LHSTy->isSaturatedFixedPointType() || RHSTy->isSaturatedFixedPointType()) ResultTy = S.Context.getCorrespondingSaturatedType(ResultTy); return ResultTy; } /// Check that the usual arithmetic conversions can be performed on this pair of /// expressions that might be of enumeration type. static void checkEnumArithmeticConversions(Sema &S, Expr *LHS, Expr *RHS, SourceLocation Loc, Sema::ArithConvKind ACK) { // C++2a [expr.arith.conv]p1: // If one operand is of enumeration type and the other operand is of a // different enumeration type or a floating-point type, this behavior is // deprecated ([depr.arith.conv.enum]). // // Warn on this in all language modes. Produce a deprecation warning in C++20. // Eventually we will presumably reject these cases (in C++23 onwards?). QualType L = LHS->getType(), R = RHS->getType(); bool LEnum = L->isUnscopedEnumerationType(), REnum = R->isUnscopedEnumerationType(); bool IsCompAssign = ACK == Sema::ACK_CompAssign; if ((!IsCompAssign && LEnum && R->isFloatingType()) || (REnum && L->isFloatingType())) { S.Diag(Loc, S.getLangOpts().CPlusPlus20 ? diag::warn_arith_conv_enum_float_cxx20 : diag::warn_arith_conv_enum_float) << LHS->getSourceRange() << RHS->getSourceRange() << (int)ACK << LEnum << L << R; } else if (!IsCompAssign && LEnum && REnum && !S.Context.hasSameUnqualifiedType(L, R)) { unsigned DiagID; if (!L->castAs()->getDecl()->hasNameForLinkage() || !R->castAs()->getDecl()->hasNameForLinkage()) { // If either enumeration type is unnamed, it's less likely that the // user cares about this, but this situation is still deprecated in // C++2a. Use a different warning group. DiagID = S.getLangOpts().CPlusPlus20 ? diag::warn_arith_conv_mixed_anon_enum_types_cxx20 : diag::warn_arith_conv_mixed_anon_enum_types; } else if (ACK == Sema::ACK_Conditional) { // Conditional expressions are separated out because they have // historically had a different warning flag. DiagID = S.getLangOpts().CPlusPlus20 ? diag::warn_conditional_mixed_enum_types_cxx20 : diag::warn_conditional_mixed_enum_types; } else if (ACK == Sema::ACK_Comparison) { // Comparison expressions are separated out because they have // historically had a different warning flag. DiagID = S.getLangOpts().CPlusPlus20 ? diag::warn_comparison_mixed_enum_types_cxx20 : diag::warn_comparison_mixed_enum_types; } else { DiagID = S.getLangOpts().CPlusPlus20 ? diag::warn_arith_conv_mixed_enum_types_cxx20 : diag::warn_arith_conv_mixed_enum_types; } S.Diag(Loc, DiagID) << LHS->getSourceRange() << RHS->getSourceRange() << (int)ACK << L << R; } } /// UsualArithmeticConversions - Performs various conversions that are common to /// binary operators (C99 6.3.1.8). If both operands aren't arithmetic, this /// routine returns the first non-arithmetic type found. The client is /// responsible for emitting appropriate error diagnostics. QualType Sema::UsualArithmeticConversions(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, ArithConvKind ACK) { checkEnumArithmeticConversions(*this, LHS.get(), RHS.get(), Loc, ACK); if (ACK != ACK_CompAssign) { LHS = UsualUnaryConversions(LHS.get()); if (LHS.isInvalid()) return QualType(); } RHS = UsualUnaryConversions(RHS.get()); if (RHS.isInvalid()) return QualType(); // For conversion purposes, we ignore any qualifiers. // For example, "const float" and "float" are equivalent. QualType LHSType = Context.getCanonicalType(LHS.get()->getType()).getUnqualifiedType(); QualType RHSType = Context.getCanonicalType(RHS.get()->getType()).getUnqualifiedType(); // For conversion purposes, we ignore any atomic qualifier on the LHS. if (const AtomicType *AtomicLHS = LHSType->getAs()) LHSType = AtomicLHS->getValueType(); // If both types are identical, no conversion is needed. if (LHSType == RHSType) return LHSType; // If either side is a non-arithmetic type (e.g. a pointer), we are done. // The caller can deal with this (e.g. pointer + int). if (!LHSType->isArithmeticType() || !RHSType->isArithmeticType()) return QualType(); // Apply unary and bitfield promotions to the LHS's type. QualType LHSUnpromotedType = LHSType; if (LHSType->isPromotableIntegerType()) LHSType = Context.getPromotedIntegerType(LHSType); QualType LHSBitfieldPromoteTy = Context.isPromotableBitField(LHS.get()); if (!LHSBitfieldPromoteTy.isNull()) LHSType = LHSBitfieldPromoteTy; if (LHSType != LHSUnpromotedType && ACK != ACK_CompAssign) LHS = ImpCastExprToType(LHS.get(), LHSType, CK_IntegralCast); // If both types are identical, no conversion is needed. if (LHSType == RHSType) return LHSType; // At this point, we have two different arithmetic types. // Diagnose attempts to convert between __ibm128, __float128 and long double // where such conversions currently can't be handled. if (unsupportedTypeConversion(*this, LHSType, RHSType)) return QualType(); // Handle complex types first (C99 6.3.1.8p1). if (LHSType->isComplexType() || RHSType->isComplexType()) return handleComplexFloatConversion(*this, LHS, RHS, LHSType, RHSType, ACK == ACK_CompAssign); // Now handle "real" floating types (i.e. float, double, long double). if (LHSType->isRealFloatingType() || RHSType->isRealFloatingType()) return handleFloatConversion(*this, LHS, RHS, LHSType, RHSType, ACK == ACK_CompAssign); // Handle GCC complex int extension. if (LHSType->isComplexIntegerType() || RHSType->isComplexIntegerType()) return handleComplexIntConversion(*this, LHS, RHS, LHSType, RHSType, ACK == ACK_CompAssign); if (LHSType->isFixedPointType() || RHSType->isFixedPointType()) return handleFixedPointConversion(*this, LHSType, RHSType); // Finally, we have two differing integer types. return handleIntegerConversion (*this, LHS, RHS, LHSType, RHSType, ACK == ACK_CompAssign); } //===----------------------------------------------------------------------===// // Semantic Analysis for various Expression Types //===----------------------------------------------------------------------===// ExprResult Sema::ActOnGenericSelectionExpr(SourceLocation KeyLoc, SourceLocation DefaultLoc, SourceLocation RParenLoc, Expr *ControllingExpr, ArrayRef ArgTypes, ArrayRef ArgExprs) { unsigned NumAssocs = ArgTypes.size(); assert(NumAssocs == ArgExprs.size()); TypeSourceInfo **Types = new TypeSourceInfo*[NumAssocs]; for (unsigned i = 0; i < NumAssocs; ++i) { if (ArgTypes[i]) (void) GetTypeFromParser(ArgTypes[i], &Types[i]); else Types[i] = nullptr; } ExprResult ER = CreateGenericSelectionExpr(KeyLoc, DefaultLoc, RParenLoc, ControllingExpr, llvm::makeArrayRef(Types, NumAssocs), ArgExprs); delete [] Types; return ER; } ExprResult Sema::CreateGenericSelectionExpr(SourceLocation KeyLoc, SourceLocation DefaultLoc, SourceLocation RParenLoc, Expr *ControllingExpr, ArrayRef Types, ArrayRef Exprs) { unsigned NumAssocs = Types.size(); assert(NumAssocs == Exprs.size()); // Decay and strip qualifiers for the controlling expression type, and handle // placeholder type replacement. See committee discussion from WG14 DR423. { EnterExpressionEvaluationContext Unevaluated( *this, Sema::ExpressionEvaluationContext::Unevaluated); ExprResult R = DefaultFunctionArrayLvalueConversion(ControllingExpr); if (R.isInvalid()) return ExprError(); ControllingExpr = R.get(); } bool TypeErrorFound = false, IsResultDependent = ControllingExpr->isTypeDependent(), ContainsUnexpandedParameterPack = ControllingExpr->containsUnexpandedParameterPack(); // The controlling expression is an unevaluated operand, so side effects are // likely unintended. if (!inTemplateInstantiation() && !IsResultDependent && ControllingExpr->HasSideEffects(Context, false)) Diag(ControllingExpr->getExprLoc(), diag::warn_side_effects_unevaluated_context); for (unsigned i = 0; i < NumAssocs; ++i) { if (Exprs[i]->containsUnexpandedParameterPack()) ContainsUnexpandedParameterPack = true; if (Types[i]) { if (Types[i]->getType()->containsUnexpandedParameterPack()) ContainsUnexpandedParameterPack = true; if (Types[i]->getType()->isDependentType()) { IsResultDependent = true; } else { // C11 6.5.1.1p2 "The type name in a generic association shall specify a // complete object type other than a variably modified type." unsigned D = 0; if (Types[i]->getType()->isIncompleteType()) D = diag::err_assoc_type_incomplete; else if (!Types[i]->getType()->isObjectType()) D = diag::err_assoc_type_nonobject; else if (Types[i]->getType()->isVariablyModifiedType()) D = diag::err_assoc_type_variably_modified; else { // Because the controlling expression undergoes lvalue conversion, // array conversion, and function conversion, an association which is // of array type, function type, or is qualified can never be // reached. We will warn about this so users are less surprised by // the unreachable association. However, we don't have to handle // function types; that's not an object type, so it's handled above. // // The logic is somewhat different for C++ because C++ has different // lvalue to rvalue conversion rules than C. [conv.lvalue]p1 says, // If T is a non-class type, the type of the prvalue is the cv- // unqualified version of T. Otherwise, the type of the prvalue is T. // The result of these rules is that all qualified types in an // association in C are unreachable, and in C++, only qualified non- // class types are unreachable. unsigned Reason = 0; QualType QT = Types[i]->getType(); if (QT->isArrayType()) Reason = 1; else if (QT.hasQualifiers() && (!LangOpts.CPlusPlus || !QT->isRecordType())) Reason = 2; if (Reason) Diag(Types[i]->getTypeLoc().getBeginLoc(), diag::warn_unreachable_association) << QT << (Reason - 1); } if (D != 0) { Diag(Types[i]->getTypeLoc().getBeginLoc(), D) << Types[i]->getTypeLoc().getSourceRange() << Types[i]->getType(); TypeErrorFound = true; } // C11 6.5.1.1p2 "No two generic associations in the same generic // selection shall specify compatible types." for (unsigned j = i+1; j < NumAssocs; ++j) if (Types[j] && !Types[j]->getType()->isDependentType() && Context.typesAreCompatible(Types[i]->getType(), Types[j]->getType())) { Diag(Types[j]->getTypeLoc().getBeginLoc(), diag::err_assoc_compatible_types) << Types[j]->getTypeLoc().getSourceRange() << Types[j]->getType() << Types[i]->getType(); Diag(Types[i]->getTypeLoc().getBeginLoc(), diag::note_compat_assoc) << Types[i]->getTypeLoc().getSourceRange() << Types[i]->getType(); TypeErrorFound = true; } } } } if (TypeErrorFound) return ExprError(); // If we determined that the generic selection is result-dependent, don't // try to compute the result expression. if (IsResultDependent) return GenericSelectionExpr::Create(Context, KeyLoc, ControllingExpr, Types, Exprs, DefaultLoc, RParenLoc, ContainsUnexpandedParameterPack); SmallVector CompatIndices; unsigned DefaultIndex = -1U; // Look at the canonical type of the controlling expression in case it was a // deduced type like __auto_type. However, when issuing diagnostics, use the // type the user wrote in source rather than the canonical one. for (unsigned i = 0; i < NumAssocs; ++i) { if (!Types[i]) DefaultIndex = i; else if (Context.typesAreCompatible( ControllingExpr->getType().getCanonicalType(), Types[i]->getType())) CompatIndices.push_back(i); } // C11 6.5.1.1p2 "The controlling expression of a generic selection shall have // type compatible with at most one of the types named in its generic // association list." if (CompatIndices.size() > 1) { // We strip parens here because the controlling expression is typically // parenthesized in macro definitions. ControllingExpr = ControllingExpr->IgnoreParens(); Diag(ControllingExpr->getBeginLoc(), diag::err_generic_sel_multi_match) << ControllingExpr->getSourceRange() << ControllingExpr->getType() << (unsigned)CompatIndices.size(); for (unsigned I : CompatIndices) { Diag(Types[I]->getTypeLoc().getBeginLoc(), diag::note_compat_assoc) << Types[I]->getTypeLoc().getSourceRange() << Types[I]->getType(); } return ExprError(); } // C11 6.5.1.1p2 "If a generic selection has no default generic association, // its controlling expression shall have type compatible with exactly one of // the types named in its generic association list." if (DefaultIndex == -1U && CompatIndices.size() == 0) { // We strip parens here because the controlling expression is typically // parenthesized in macro definitions. ControllingExpr = ControllingExpr->IgnoreParens(); Diag(ControllingExpr->getBeginLoc(), diag::err_generic_sel_no_match) << ControllingExpr->getSourceRange() << ControllingExpr->getType(); return ExprError(); } // C11 6.5.1.1p3 "If a generic selection has a generic association with a // type name that is compatible with the type of the controlling expression, // then the result expression of the generic selection is the expression // in that generic association. Otherwise, the result expression of the // generic selection is the expression in the default generic association." unsigned ResultIndex = CompatIndices.size() ? CompatIndices[0] : DefaultIndex; return GenericSelectionExpr::Create( Context, KeyLoc, ControllingExpr, Types, Exprs, DefaultLoc, RParenLoc, ContainsUnexpandedParameterPack, ResultIndex); } /// getUDSuffixLoc - Create a SourceLocation for a ud-suffix, given the /// location of the token and the offset of the ud-suffix within it. static SourceLocation getUDSuffixLoc(Sema &S, SourceLocation TokLoc, unsigned Offset) { return Lexer::AdvanceToTokenCharacter(TokLoc, Offset, S.getSourceManager(), S.getLangOpts()); } /// BuildCookedLiteralOperatorCall - A user-defined literal was found. Look up /// the corresponding cooked (non-raw) literal operator, and build a call to it. static ExprResult BuildCookedLiteralOperatorCall(Sema &S, Scope *Scope, IdentifierInfo *UDSuffix, SourceLocation UDSuffixLoc, ArrayRef Args, SourceLocation LitEndLoc) { assert(Args.size() <= 2 && "too many arguments for literal operator"); QualType ArgTy[2]; for (unsigned ArgIdx = 0; ArgIdx != Args.size(); ++ArgIdx) { ArgTy[ArgIdx] = Args[ArgIdx]->getType(); if (ArgTy[ArgIdx]->isArrayType()) ArgTy[ArgIdx] = S.Context.getArrayDecayedType(ArgTy[ArgIdx]); } DeclarationName OpName = S.Context.DeclarationNames.getCXXLiteralOperatorName(UDSuffix); DeclarationNameInfo OpNameInfo(OpName, UDSuffixLoc); OpNameInfo.setCXXLiteralOperatorNameLoc(UDSuffixLoc); LookupResult R(S, OpName, UDSuffixLoc, Sema::LookupOrdinaryName); if (S.LookupLiteralOperator(Scope, R, llvm::makeArrayRef(ArgTy, Args.size()), /*AllowRaw*/ false, /*AllowTemplate*/ false, /*AllowStringTemplatePack*/ false, /*DiagnoseMissing*/ true) == Sema::LOLR_Error) return ExprError(); return S.BuildLiteralOperatorCall(R, OpNameInfo, Args, LitEndLoc); } /// ActOnStringLiteral - The specified tokens were lexed as pasted string /// fragments (e.g. "foo" "bar" L"baz"). The result string has to handle string /// concatenation ([C99 5.1.1.2, translation phase #6]), so it may come from /// multiple tokens. However, the common case is that StringToks points to one /// string. /// ExprResult Sema::ActOnStringLiteral(ArrayRef StringToks, Scope *UDLScope) { assert(!StringToks.empty() && "Must have at least one string!"); StringLiteralParser Literal(StringToks, PP); if (Literal.hadError) return ExprError(); SmallVector StringTokLocs; for (const Token &Tok : StringToks) StringTokLocs.push_back(Tok.getLocation()); QualType CharTy = Context.CharTy; StringLiteral::StringKind Kind = StringLiteral::Ordinary; if (Literal.isWide()) { CharTy = Context.getWideCharType(); Kind = StringLiteral::Wide; } else if (Literal.isUTF8()) { if (getLangOpts().Char8) CharTy = Context.Char8Ty; Kind = StringLiteral::UTF8; } else if (Literal.isUTF16()) { CharTy = Context.Char16Ty; Kind = StringLiteral::UTF16; } else if (Literal.isUTF32()) { CharTy = Context.Char32Ty; Kind = StringLiteral::UTF32; } else if (Literal.isPascal()) { CharTy = Context.UnsignedCharTy; } // Warn on initializing an array of char from a u8 string literal; this // becomes ill-formed in C++2a. if (getLangOpts().CPlusPlus && !getLangOpts().CPlusPlus20 && !getLangOpts().Char8 && Kind == StringLiteral::UTF8) { Diag(StringTokLocs.front(), diag::warn_cxx20_compat_utf8_string); // Create removals for all 'u8' prefixes in the string literal(s). This // ensures C++2a compatibility (but may change the program behavior when // built by non-Clang compilers for which the execution character set is // not always UTF-8). auto RemovalDiag = PDiag(diag::note_cxx20_compat_utf8_string_remove_u8); SourceLocation RemovalDiagLoc; for (const Token &Tok : StringToks) { if (Tok.getKind() == tok::utf8_string_literal) { if (RemovalDiagLoc.isInvalid()) RemovalDiagLoc = Tok.getLocation(); RemovalDiag << FixItHint::CreateRemoval(CharSourceRange::getCharRange( Tok.getLocation(), Lexer::AdvanceToTokenCharacter(Tok.getLocation(), 2, getSourceManager(), getLangOpts()))); } } Diag(RemovalDiagLoc, RemovalDiag); } QualType StrTy = Context.getStringLiteralArrayType(CharTy, Literal.GetNumStringChars()); // Pass &StringTokLocs[0], StringTokLocs.size() to factory! StringLiteral *Lit = StringLiteral::Create(Context, Literal.GetString(), Kind, Literal.Pascal, StrTy, &StringTokLocs[0], StringTokLocs.size()); if (Literal.getUDSuffix().empty()) return Lit; // We're building a user-defined literal. IdentifierInfo *UDSuffix = &Context.Idents.get(Literal.getUDSuffix()); SourceLocation UDSuffixLoc = getUDSuffixLoc(*this, StringTokLocs[Literal.getUDSuffixToken()], Literal.getUDSuffixOffset()); // Make sure we're allowed user-defined literals here. if (!UDLScope) return ExprError(Diag(UDSuffixLoc, diag::err_invalid_string_udl)); // C++11 [lex.ext]p5: The literal L is treated as a call of the form // operator "" X (str, len) QualType SizeType = Context.getSizeType(); DeclarationName OpName = Context.DeclarationNames.getCXXLiteralOperatorName(UDSuffix); DeclarationNameInfo OpNameInfo(OpName, UDSuffixLoc); OpNameInfo.setCXXLiteralOperatorNameLoc(UDSuffixLoc); QualType ArgTy[] = { Context.getArrayDecayedType(StrTy), SizeType }; LookupResult R(*this, OpName, UDSuffixLoc, LookupOrdinaryName); switch (LookupLiteralOperator(UDLScope, R, ArgTy, /*AllowRaw*/ false, /*AllowTemplate*/ true, /*AllowStringTemplatePack*/ true, /*DiagnoseMissing*/ true, Lit)) { case LOLR_Cooked: { llvm::APInt Len(Context.getIntWidth(SizeType), Literal.GetNumStringChars()); IntegerLiteral *LenArg = IntegerLiteral::Create(Context, Len, SizeType, StringTokLocs[0]); Expr *Args[] = { Lit, LenArg }; return BuildLiteralOperatorCall(R, OpNameInfo, Args, StringTokLocs.back()); } case LOLR_Template: { TemplateArgumentListInfo ExplicitArgs; TemplateArgument Arg(Lit); TemplateArgumentLocInfo ArgInfo(Lit); ExplicitArgs.addArgument(TemplateArgumentLoc(Arg, ArgInfo)); return BuildLiteralOperatorCall(R, OpNameInfo, None, StringTokLocs.back(), &ExplicitArgs); } case LOLR_StringTemplatePack: { TemplateArgumentListInfo ExplicitArgs; unsigned CharBits = Context.getIntWidth(CharTy); bool CharIsUnsigned = CharTy->isUnsignedIntegerType(); llvm::APSInt Value(CharBits, CharIsUnsigned); TemplateArgument TypeArg(CharTy); TemplateArgumentLocInfo TypeArgInfo(Context.getTrivialTypeSourceInfo(CharTy)); ExplicitArgs.addArgument(TemplateArgumentLoc(TypeArg, TypeArgInfo)); for (unsigned I = 0, N = Lit->getLength(); I != N; ++I) { Value = Lit->getCodeUnit(I); TemplateArgument Arg(Context, Value, CharTy); TemplateArgumentLocInfo ArgInfo; ExplicitArgs.addArgument(TemplateArgumentLoc(Arg, ArgInfo)); } return BuildLiteralOperatorCall(R, OpNameInfo, None, StringTokLocs.back(), &ExplicitArgs); } case LOLR_Raw: case LOLR_ErrorNoDiagnostic: llvm_unreachable("unexpected literal operator lookup result"); case LOLR_Error: return ExprError(); } llvm_unreachable("unexpected literal operator lookup result"); } DeclRefExpr * Sema::BuildDeclRefExpr(ValueDecl *D, QualType Ty, ExprValueKind VK, SourceLocation Loc, const CXXScopeSpec *SS) { DeclarationNameInfo NameInfo(D->getDeclName(), Loc); return BuildDeclRefExpr(D, Ty, VK, NameInfo, SS); } DeclRefExpr * Sema::BuildDeclRefExpr(ValueDecl *D, QualType Ty, ExprValueKind VK, const DeclarationNameInfo &NameInfo, const CXXScopeSpec *SS, NamedDecl *FoundD, SourceLocation TemplateKWLoc, const TemplateArgumentListInfo *TemplateArgs) { NestedNameSpecifierLoc NNS = SS ? SS->getWithLocInContext(Context) : NestedNameSpecifierLoc(); return BuildDeclRefExpr(D, Ty, VK, NameInfo, NNS, FoundD, TemplateKWLoc, TemplateArgs); } // CUDA/HIP: Check whether a captured reference variable is referencing a // host variable in a device or host device lambda. static bool isCapturingReferenceToHostVarInCUDADeviceLambda(const Sema &S, VarDecl *VD) { if (!S.getLangOpts().CUDA || !VD->hasInit()) return false; assert(VD->getType()->isReferenceType()); // Check whether the reference variable is referencing a host variable. auto *DRE = dyn_cast(VD->getInit()); if (!DRE) return false; auto *Referee = dyn_cast(DRE->getDecl()); if (!Referee || !Referee->hasGlobalStorage() || Referee->hasAttr()) return false; // Check whether the current function is a device or host device lambda. // Check whether the reference variable is a capture by getDeclContext() // since refersToEnclosingVariableOrCapture() is not ready at this point. auto *MD = dyn_cast_or_null(S.CurContext); if (MD && MD->getParent()->isLambda() && MD->getOverloadedOperator() == OO_Call && MD->hasAttr() && VD->getDeclContext() != MD) return true; return false; } NonOdrUseReason Sema::getNonOdrUseReasonInCurrentContext(ValueDecl *D) { // A declaration named in an unevaluated operand never constitutes an odr-use. if (isUnevaluatedContext()) return NOUR_Unevaluated; // C++2a [basic.def.odr]p4: // A variable x whose name appears as a potentially-evaluated expression e // is odr-used by e unless [...] x is a reference that is usable in // constant expressions. // CUDA/HIP: // If a reference variable referencing a host variable is captured in a // device or host device lambda, the value of the referee must be copied // to the capture and the reference variable must be treated as odr-use // since the value of the referee is not known at compile time and must // be loaded from the captured. if (VarDecl *VD = dyn_cast(D)) { if (VD->getType()->isReferenceType() && !(getLangOpts().OpenMP && isOpenMPCapturedDecl(D)) && !isCapturingReferenceToHostVarInCUDADeviceLambda(*this, VD) && VD->isUsableInConstantExpressions(Context)) return NOUR_Constant; } // All remaining non-variable cases constitute an odr-use. For variables, we // need to wait and see how the expression is used. return NOUR_None; } /// BuildDeclRefExpr - Build an expression that references a /// declaration that does not require a closure capture. DeclRefExpr * Sema::BuildDeclRefExpr(ValueDecl *D, QualType Ty, ExprValueKind VK, const DeclarationNameInfo &NameInfo, NestedNameSpecifierLoc NNS, NamedDecl *FoundD, SourceLocation TemplateKWLoc, const TemplateArgumentListInfo *TemplateArgs) { bool RefersToCapturedVariable = isa(D) && NeedToCaptureVariable(cast(D), NameInfo.getLoc()); DeclRefExpr *E = DeclRefExpr::Create( Context, NNS, TemplateKWLoc, D, RefersToCapturedVariable, NameInfo, Ty, VK, FoundD, TemplateArgs, getNonOdrUseReasonInCurrentContext(D)); MarkDeclRefReferenced(E); // C++ [except.spec]p17: // An exception-specification is considered to be needed when: // - in an expression, the function is the unique lookup result or // the selected member of a set of overloaded functions. // // We delay doing this until after we've built the function reference and // marked it as used so that: // a) if the function is defaulted, we get errors from defining it before / // instead of errors from computing its exception specification, and // b) if the function is a defaulted comparison, we can use the body we // build when defining it as input to the exception specification // computation rather than computing a new body. if (auto *FPT = Ty->getAs()) { if (isUnresolvedExceptionSpec(FPT->getExceptionSpecType())) { if (auto *NewFPT = ResolveExceptionSpec(NameInfo.getLoc(), FPT)) E->setType(Context.getQualifiedType(NewFPT, Ty.getQualifiers())); } } if (getLangOpts().ObjCWeak && isa(D) && Ty.getObjCLifetime() == Qualifiers::OCL_Weak && !isUnevaluatedContext() && !Diags.isIgnored(diag::warn_arc_repeated_use_of_weak, E->getBeginLoc())) getCurFunction()->recordUseOfWeak(E); FieldDecl *FD = dyn_cast(D); if (IndirectFieldDecl *IFD = dyn_cast(D)) FD = IFD->getAnonField(); if (FD) { UnusedPrivateFields.remove(FD); // Just in case we're building an illegal pointer-to-member. if (FD->isBitField()) E->setObjectKind(OK_BitField); } // C++ [expr.prim]/8: The expression [...] is a bit-field if the identifier // designates a bit-field. if (auto *BD = dyn_cast(D)) if (auto *BE = BD->getBinding()) E->setObjectKind(BE->getObjectKind()); return E; } /// Decomposes the given name into a DeclarationNameInfo, its location, and /// possibly a list of template arguments. /// /// If this produces template arguments, it is permitted to call /// DecomposeTemplateName. /// /// This actually loses a lot of source location information for /// non-standard name kinds; we should consider preserving that in /// some way. void Sema::DecomposeUnqualifiedId(const UnqualifiedId &Id, TemplateArgumentListInfo &Buffer, DeclarationNameInfo &NameInfo, const TemplateArgumentListInfo *&TemplateArgs) { if (Id.getKind() == UnqualifiedIdKind::IK_TemplateId) { Buffer.setLAngleLoc(Id.TemplateId->LAngleLoc); Buffer.setRAngleLoc(Id.TemplateId->RAngleLoc); ASTTemplateArgsPtr TemplateArgsPtr(Id.TemplateId->getTemplateArgs(), Id.TemplateId->NumArgs); translateTemplateArguments(TemplateArgsPtr, Buffer); TemplateName TName = Id.TemplateId->Template.get(); SourceLocation TNameLoc = Id.TemplateId->TemplateNameLoc; NameInfo = Context.getNameForTemplate(TName, TNameLoc); TemplateArgs = &Buffer; } else { NameInfo = GetNameFromUnqualifiedId(Id); TemplateArgs = nullptr; } } static void emitEmptyLookupTypoDiagnostic( const TypoCorrection &TC, Sema &SemaRef, const CXXScopeSpec &SS, DeclarationName Typo, SourceLocation TypoLoc, ArrayRef Args, unsigned DiagnosticID, unsigned DiagnosticSuggestID) { DeclContext *Ctx = SS.isEmpty() ? nullptr : SemaRef.computeDeclContext(SS, false); if (!TC) { // Emit a special diagnostic for failed member lookups. // FIXME: computing the declaration context might fail here (?) if (Ctx) SemaRef.Diag(TypoLoc, diag::err_no_member) << Typo << Ctx << SS.getRange(); else SemaRef.Diag(TypoLoc, DiagnosticID) << Typo; return; } std::string CorrectedStr = TC.getAsString(SemaRef.getLangOpts()); bool DroppedSpecifier = TC.WillReplaceSpecifier() && Typo.getAsString() == CorrectedStr; unsigned NoteID = TC.getCorrectionDeclAs() ? diag::note_implicit_param_decl : diag::note_previous_decl; if (!Ctx) SemaRef.diagnoseTypo(TC, SemaRef.PDiag(DiagnosticSuggestID) << Typo, SemaRef.PDiag(NoteID)); else SemaRef.diagnoseTypo(TC, SemaRef.PDiag(diag::err_no_member_suggest) << Typo << Ctx << DroppedSpecifier << SS.getRange(), SemaRef.PDiag(NoteID)); } /// Diagnose a lookup that found results in an enclosing class during error /// recovery. This usually indicates that the results were found in a dependent /// base class that could not be searched as part of a template definition. /// Always issues a diagnostic (though this may be only a warning in MS /// compatibility mode). /// /// Return \c true if the error is unrecoverable, or \c false if the caller /// should attempt to recover using these lookup results. bool Sema::DiagnoseDependentMemberLookup(LookupResult &R) { // During a default argument instantiation the CurContext points // to a CXXMethodDecl; but we can't apply a this-> fixit inside a // function parameter list, hence add an explicit check. bool isDefaultArgument = !CodeSynthesisContexts.empty() && CodeSynthesisContexts.back().Kind == CodeSynthesisContext::DefaultFunctionArgumentInstantiation; CXXMethodDecl *CurMethod = dyn_cast(CurContext); bool isInstance = CurMethod && CurMethod->isInstance() && R.getNamingClass() == CurMethod->getParent() && !isDefaultArgument; // There are two ways we can find a class-scope declaration during template // instantiation that we did not find in the template definition: if it is a // member of a dependent base class, or if it is declared after the point of // use in the same class. Distinguish these by comparing the class in which // the member was found to the naming class of the lookup. unsigned DiagID = diag::err_found_in_dependent_base; unsigned NoteID = diag::note_member_declared_at; if (R.getRepresentativeDecl()->getDeclContext()->Equals(R.getNamingClass())) { DiagID = getLangOpts().MSVCCompat ? diag::ext_found_later_in_class : diag::err_found_later_in_class; } else if (getLangOpts().MSVCCompat) { DiagID = diag::ext_found_in_dependent_base; NoteID = diag::note_dependent_member_use; } if (isInstance) { // Give a code modification hint to insert 'this->'. Diag(R.getNameLoc(), DiagID) << R.getLookupName() << FixItHint::CreateInsertion(R.getNameLoc(), "this->"); CheckCXXThisCapture(R.getNameLoc()); } else { // FIXME: Add a FixItHint to insert 'Base::' or 'Derived::' (assuming // they're not shadowed). Diag(R.getNameLoc(), DiagID) << R.getLookupName(); } for (NamedDecl *D : R) Diag(D->getLocation(), NoteID); // Return true if we are inside a default argument instantiation // and the found name refers to an instance member function, otherwise // the caller will try to create an implicit member call and this is wrong // for default arguments. // // FIXME: Is this special case necessary? We could allow the caller to // diagnose this. if (isDefaultArgument && ((*R.begin())->isCXXInstanceMember())) { Diag(R.getNameLoc(), diag::err_member_call_without_object); return true; } // Tell the callee to try to recover. return false; } /// Diagnose an empty lookup. /// /// \return false if new lookup candidates were found bool Sema::DiagnoseEmptyLookup(Scope *S, CXXScopeSpec &SS, LookupResult &R, CorrectionCandidateCallback &CCC, TemplateArgumentListInfo *ExplicitTemplateArgs, ArrayRef Args, TypoExpr **Out) { DeclarationName Name = R.getLookupName(); unsigned diagnostic = diag::err_undeclared_var_use; unsigned diagnostic_suggest = diag::err_undeclared_var_use_suggest; if (Name.getNameKind() == DeclarationName::CXXOperatorName || Name.getNameKind() == DeclarationName::CXXLiteralOperatorName || Name.getNameKind() == DeclarationName::CXXConversionFunctionName) { diagnostic = diag::err_undeclared_use; diagnostic_suggest = diag::err_undeclared_use_suggest; } // If the original lookup was an unqualified lookup, fake an // unqualified lookup. This is useful when (for example) the // original lookup would not have found something because it was a // dependent name. DeclContext *DC = SS.isEmpty() ? CurContext : nullptr; while (DC) { if (isa(DC)) { LookupQualifiedName(R, DC); if (!R.empty()) { // Don't give errors about ambiguities in this lookup. R.suppressDiagnostics(); // If there's a best viable function among the results, only mention // that one in the notes. OverloadCandidateSet Candidates(R.getNameLoc(), OverloadCandidateSet::CSK_Normal); AddOverloadedCallCandidates(R, ExplicitTemplateArgs, Args, Candidates); OverloadCandidateSet::iterator Best; if (Candidates.BestViableFunction(*this, R.getNameLoc(), Best) == OR_Success) { R.clear(); R.addDecl(Best->FoundDecl.getDecl(), Best->FoundDecl.getAccess()); R.resolveKind(); } return DiagnoseDependentMemberLookup(R); } R.clear(); } DC = DC->getLookupParent(); } // We didn't find anything, so try to correct for a typo. TypoCorrection Corrected; if (S && Out) { SourceLocation TypoLoc = R.getNameLoc(); assert(!ExplicitTemplateArgs && "Diagnosing an empty lookup with explicit template args!"); *Out = CorrectTypoDelayed( R.getLookupNameInfo(), R.getLookupKind(), S, &SS, CCC, [=](const TypoCorrection &TC) { emitEmptyLookupTypoDiagnostic(TC, *this, SS, Name, TypoLoc, Args, diagnostic, diagnostic_suggest); }, nullptr, CTK_ErrorRecovery); if (*Out) return true; } else if (S && (Corrected = CorrectTypo(R.getLookupNameInfo(), R.getLookupKind(), S, &SS, CCC, CTK_ErrorRecovery))) { std::string CorrectedStr(Corrected.getAsString(getLangOpts())); bool DroppedSpecifier = Corrected.WillReplaceSpecifier() && Name.getAsString() == CorrectedStr; R.setLookupName(Corrected.getCorrection()); bool AcceptableWithRecovery = false; bool AcceptableWithoutRecovery = false; NamedDecl *ND = Corrected.getFoundDecl(); if (ND) { if (Corrected.isOverloaded()) { OverloadCandidateSet OCS(R.getNameLoc(), OverloadCandidateSet::CSK_Normal); OverloadCandidateSet::iterator Best; for (NamedDecl *CD : Corrected) { if (FunctionTemplateDecl *FTD = dyn_cast(CD)) AddTemplateOverloadCandidate( FTD, DeclAccessPair::make(FTD, AS_none), ExplicitTemplateArgs, Args, OCS); else if (FunctionDecl *FD = dyn_cast(CD)) if (!ExplicitTemplateArgs || ExplicitTemplateArgs->size() == 0) AddOverloadCandidate(FD, DeclAccessPair::make(FD, AS_none), Args, OCS); } switch (OCS.BestViableFunction(*this, R.getNameLoc(), Best)) { case OR_Success: ND = Best->FoundDecl; Corrected.setCorrectionDecl(ND); break; default: // FIXME: Arbitrarily pick the first declaration for the note. Corrected.setCorrectionDecl(ND); break; } } R.addDecl(ND); if (getLangOpts().CPlusPlus && ND->isCXXClassMember()) { CXXRecordDecl *Record = nullptr; if (Corrected.getCorrectionSpecifier()) { const Type *Ty = Corrected.getCorrectionSpecifier()->getAsType(); Record = Ty->getAsCXXRecordDecl(); } if (!Record) Record = cast( ND->getDeclContext()->getRedeclContext()); R.setNamingClass(Record); } auto *UnderlyingND = ND->getUnderlyingDecl(); AcceptableWithRecovery = isa(UnderlyingND) || isa(UnderlyingND); // FIXME: If we ended up with a typo for a type name or // Objective-C class name, we're in trouble because the parser // is in the wrong place to recover. Suggest the typo // correction, but don't make it a fix-it since we're not going // to recover well anyway. AcceptableWithoutRecovery = isa(UnderlyingND) || getAsTypeTemplateDecl(UnderlyingND) || isa(UnderlyingND); } else { // FIXME: We found a keyword. Suggest it, but don't provide a fix-it // because we aren't able to recover. AcceptableWithoutRecovery = true; } if (AcceptableWithRecovery || AcceptableWithoutRecovery) { unsigned NoteID = Corrected.getCorrectionDeclAs() ? diag::note_implicit_param_decl : diag::note_previous_decl; if (SS.isEmpty()) diagnoseTypo(Corrected, PDiag(diagnostic_suggest) << Name, PDiag(NoteID), AcceptableWithRecovery); else diagnoseTypo(Corrected, PDiag(diag::err_no_member_suggest) << Name << computeDeclContext(SS, false) << DroppedSpecifier << SS.getRange(), PDiag(NoteID), AcceptableWithRecovery); // Tell the callee whether to try to recover. return !AcceptableWithRecovery; } } R.clear(); // Emit a special diagnostic for failed member lookups. // FIXME: computing the declaration context might fail here (?) if (!SS.isEmpty()) { Diag(R.getNameLoc(), diag::err_no_member) << Name << computeDeclContext(SS, false) << SS.getRange(); return true; } // Give up, we can't recover. Diag(R.getNameLoc(), diagnostic) << Name; return true; } /// In Microsoft mode, if we are inside a template class whose parent class has /// dependent base classes, and we can't resolve an unqualified identifier, then /// assume the identifier is a member of a dependent base class. We can only /// recover successfully in static methods, instance methods, and other contexts /// where 'this' is available. This doesn't precisely match MSVC's /// instantiation model, but it's close enough. static Expr * recoverFromMSUnqualifiedLookup(Sema &S, ASTContext &Context, DeclarationNameInfo &NameInfo, SourceLocation TemplateKWLoc, const TemplateArgumentListInfo *TemplateArgs) { // Only try to recover from lookup into dependent bases in static methods or // contexts where 'this' is available. QualType ThisType = S.getCurrentThisType(); const CXXRecordDecl *RD = nullptr; if (!ThisType.isNull()) RD = ThisType->getPointeeType()->getAsCXXRecordDecl(); else if (auto *MD = dyn_cast(S.CurContext)) RD = MD->getParent(); if (!RD || !RD->hasAnyDependentBases()) return nullptr; // Diagnose this as unqualified lookup into a dependent base class. If 'this' // is available, suggest inserting 'this->' as a fixit. SourceLocation Loc = NameInfo.getLoc(); auto DB = S.Diag(Loc, diag::ext_undeclared_unqual_id_with_dependent_base); DB << NameInfo.getName() << RD; if (!ThisType.isNull()) { DB << FixItHint::CreateInsertion(Loc, "this->"); return CXXDependentScopeMemberExpr::Create( Context, /*This=*/nullptr, ThisType, /*IsArrow=*/true, /*Op=*/SourceLocation(), NestedNameSpecifierLoc(), TemplateKWLoc, /*FirstQualifierFoundInScope=*/nullptr, NameInfo, TemplateArgs); } // Synthesize a fake NNS that points to the derived class. This will // perform name lookup during template instantiation. CXXScopeSpec SS; auto *NNS = NestedNameSpecifier::Create(Context, nullptr, true, RD->getTypeForDecl()); SS.MakeTrivial(Context, NNS, SourceRange(Loc, Loc)); return DependentScopeDeclRefExpr::Create( Context, SS.getWithLocInContext(Context), TemplateKWLoc, NameInfo, TemplateArgs); } ExprResult Sema::ActOnIdExpression(Scope *S, CXXScopeSpec &SS, SourceLocation TemplateKWLoc, UnqualifiedId &Id, bool HasTrailingLParen, bool IsAddressOfOperand, CorrectionCandidateCallback *CCC, bool IsInlineAsmIdentifier, Token *KeywordReplacement) { assert(!(IsAddressOfOperand && HasTrailingLParen) && "cannot be direct & operand and have a trailing lparen"); if (SS.isInvalid()) return ExprError(); TemplateArgumentListInfo TemplateArgsBuffer; // Decompose the UnqualifiedId into the following data. DeclarationNameInfo NameInfo; const TemplateArgumentListInfo *TemplateArgs; DecomposeUnqualifiedId(Id, TemplateArgsBuffer, NameInfo, TemplateArgs); DeclarationName Name = NameInfo.getName(); IdentifierInfo *II = Name.getAsIdentifierInfo(); SourceLocation NameLoc = NameInfo.getLoc(); if (II && II->isEditorPlaceholder()) { // FIXME: When typed placeholders are supported we can create a typed // placeholder expression node. return ExprError(); } // C++ [temp.dep.expr]p3: // An id-expression is type-dependent if it contains: // -- an identifier that was declared with a dependent type, // (note: handled after lookup) // -- a template-id that is dependent, // (note: handled in BuildTemplateIdExpr) // -- a conversion-function-id that specifies a dependent type, // -- a nested-name-specifier that contains a class-name that // names a dependent type. // Determine whether this is a member of an unknown specialization; // we need to handle these differently. bool DependentID = false; if (Name.getNameKind() == DeclarationName::CXXConversionFunctionName && Name.getCXXNameType()->isDependentType()) { DependentID = true; } else if (SS.isSet()) { if (DeclContext *DC = computeDeclContext(SS, false)) { if (RequireCompleteDeclContext(SS, DC)) return ExprError(); } else { DependentID = true; } } if (DependentID) return ActOnDependentIdExpression(SS, TemplateKWLoc, NameInfo, IsAddressOfOperand, TemplateArgs); // Perform the required lookup. LookupResult R(*this, NameInfo, (Id.getKind() == UnqualifiedIdKind::IK_ImplicitSelfParam) ? LookupObjCImplicitSelfParam : LookupOrdinaryName); if (TemplateKWLoc.isValid() || TemplateArgs) { // Lookup the template name again to correctly establish the context in // which it was found. This is really unfortunate as we already did the // lookup to determine that it was a template name in the first place. If // this becomes a performance hit, we can work harder to preserve those // results until we get here but it's likely not worth it. bool MemberOfUnknownSpecialization; AssumedTemplateKind AssumedTemplate; if (LookupTemplateName(R, S, SS, QualType(), /*EnteringContext=*/false, MemberOfUnknownSpecialization, TemplateKWLoc, &AssumedTemplate)) return ExprError(); if (MemberOfUnknownSpecialization || (R.getResultKind() == LookupResult::NotFoundInCurrentInstantiation)) return ActOnDependentIdExpression(SS, TemplateKWLoc, NameInfo, IsAddressOfOperand, TemplateArgs); } else { bool IvarLookupFollowUp = II && !SS.isSet() && getCurMethodDecl(); LookupParsedName(R, S, &SS, !IvarLookupFollowUp); // If the result might be in a dependent base class, this is a dependent // id-expression. if (R.getResultKind() == LookupResult::NotFoundInCurrentInstantiation) return ActOnDependentIdExpression(SS, TemplateKWLoc, NameInfo, IsAddressOfOperand, TemplateArgs); // If this reference is in an Objective-C method, then we need to do // some special Objective-C lookup, too. if (IvarLookupFollowUp) { ExprResult E(LookupInObjCMethod(R, S, II, true)); if (E.isInvalid()) return ExprError(); if (Expr *Ex = E.getAs()) return Ex; } } if (R.isAmbiguous()) return ExprError(); // This could be an implicitly declared function reference if the language // mode allows it as a feature. if (R.empty() && HasTrailingLParen && II && getLangOpts().implicitFunctionsAllowed()) { NamedDecl *D = ImplicitlyDefineFunction(NameLoc, *II, S); if (D) R.addDecl(D); } // Determine whether this name might be a candidate for // argument-dependent lookup. bool ADL = UseArgumentDependentLookup(SS, R, HasTrailingLParen); if (R.empty() && !ADL) { if (SS.isEmpty() && getLangOpts().MSVCCompat) { if (Expr *E = recoverFromMSUnqualifiedLookup(*this, Context, NameInfo, TemplateKWLoc, TemplateArgs)) return E; } // Don't diagnose an empty lookup for inline assembly. if (IsInlineAsmIdentifier) return ExprError(); // If this name wasn't predeclared and if this is not a function // call, diagnose the problem. TypoExpr *TE = nullptr; DefaultFilterCCC DefaultValidator(II, SS.isValid() ? SS.getScopeRep() : nullptr); DefaultValidator.IsAddressOfOperand = IsAddressOfOperand; assert((!CCC || CCC->IsAddressOfOperand == IsAddressOfOperand) && "Typo correction callback misconfigured"); if (CCC) { // Make sure the callback knows what the typo being diagnosed is. CCC->setTypoName(II); if (SS.isValid()) CCC->setTypoNNS(SS.getScopeRep()); } // FIXME: DiagnoseEmptyLookup produces bad diagnostics if we're looking for // a template name, but we happen to have always already looked up the name // before we get here if it must be a template name. if (DiagnoseEmptyLookup(S, SS, R, CCC ? *CCC : DefaultValidator, nullptr, None, &TE)) { if (TE && KeywordReplacement) { auto &State = getTypoExprState(TE); auto BestTC = State.Consumer->getNextCorrection(); if (BestTC.isKeyword()) { auto *II = BestTC.getCorrectionAsIdentifierInfo(); if (State.DiagHandler) State.DiagHandler(BestTC); KeywordReplacement->startToken(); KeywordReplacement->setKind(II->getTokenID()); KeywordReplacement->setIdentifierInfo(II); KeywordReplacement->setLocation(BestTC.getCorrectionRange().getBegin()); // Clean up the state associated with the TypoExpr, since it has // now been diagnosed (without a call to CorrectDelayedTyposInExpr). clearDelayedTypo(TE); // Signal that a correction to a keyword was performed by returning a // valid-but-null ExprResult. return (Expr*)nullptr; } State.Consumer->resetCorrectionStream(); } return TE ? TE : ExprError(); } assert(!R.empty() && "DiagnoseEmptyLookup returned false but added no results"); // If we found an Objective-C instance variable, let // LookupInObjCMethod build the appropriate expression to // reference the ivar. if (ObjCIvarDecl *Ivar = R.getAsSingle()) { R.clear(); ExprResult E(LookupInObjCMethod(R, S, Ivar->getIdentifier())); // In a hopelessly buggy code, Objective-C instance variable // lookup fails and no expression will be built to reference it. if (!E.isInvalid() && !E.get()) return ExprError(); return E; } } // This is guaranteed from this point on. assert(!R.empty() || ADL); // Check whether this might be a C++ implicit instance member access. // C++ [class.mfct.non-static]p3: // When an id-expression that is not part of a class member access // syntax and not used to form a pointer to member is used in the // body of a non-static member function of class X, if name lookup // resolves the name in the id-expression to a non-static non-type // member of some class C, the id-expression is transformed into a // class member access expression using (*this) as the // postfix-expression to the left of the . operator. // // But we don't actually need to do this for '&' operands if R // resolved to a function or overloaded function set, because the // expression is ill-formed if it actually works out to be a // non-static member function: // // C++ [expr.ref]p4: // Otherwise, if E1.E2 refers to a non-static member function. . . // [t]he expression can be used only as the left-hand operand of a // member function call. // // There are other safeguards against such uses, but it's important // to get this right here so that we don't end up making a // spuriously dependent expression if we're inside a dependent // instance method. if (!R.empty() && (*R.begin())->isCXXClassMember()) { bool MightBeImplicitMember; if (!IsAddressOfOperand) MightBeImplicitMember = true; else if (!SS.isEmpty()) MightBeImplicitMember = false; else if (R.isOverloadedResult()) MightBeImplicitMember = false; else if (R.isUnresolvableResult()) MightBeImplicitMember = true; else MightBeImplicitMember = isa(R.getFoundDecl()) || isa(R.getFoundDecl()) || isa(R.getFoundDecl()); if (MightBeImplicitMember) return BuildPossibleImplicitMemberExpr(SS, TemplateKWLoc, R, TemplateArgs, S); } if (TemplateArgs || TemplateKWLoc.isValid()) { // In C++1y, if this is a variable template id, then check it // in BuildTemplateIdExpr(). // The single lookup result must be a variable template declaration. if (Id.getKind() == UnqualifiedIdKind::IK_TemplateId && Id.TemplateId && Id.TemplateId->Kind == TNK_Var_template) { assert(R.getAsSingle() && "There should only be one declaration found."); } return BuildTemplateIdExpr(SS, TemplateKWLoc, R, ADL, TemplateArgs); } return BuildDeclarationNameExpr(SS, R, ADL); } /// BuildQualifiedDeclarationNameExpr - Build a C++ qualified /// declaration name, generally during template instantiation. /// There's a large number of things which don't need to be done along /// this path. ExprResult Sema::BuildQualifiedDeclarationNameExpr( CXXScopeSpec &SS, const DeclarationNameInfo &NameInfo, bool IsAddressOfOperand, const Scope *S, TypeSourceInfo **RecoveryTSI) { DeclContext *DC = computeDeclContext(SS, false); if (!DC) return BuildDependentDeclRefExpr(SS, /*TemplateKWLoc=*/SourceLocation(), NameInfo, /*TemplateArgs=*/nullptr); if (RequireCompleteDeclContext(SS, DC)) return ExprError(); LookupResult R(*this, NameInfo, LookupOrdinaryName); LookupQualifiedName(R, DC); if (R.isAmbiguous()) return ExprError(); if (R.getResultKind() == LookupResult::NotFoundInCurrentInstantiation) return BuildDependentDeclRefExpr(SS, /*TemplateKWLoc=*/SourceLocation(), NameInfo, /*TemplateArgs=*/nullptr); if (R.empty()) { // Don't diagnose problems with invalid record decl, the secondary no_member // diagnostic during template instantiation is likely bogus, e.g. if a class // is invalid because it's derived from an invalid base class, then missing // members were likely supposed to be inherited. if (const auto *CD = dyn_cast(DC)) if (CD->isInvalidDecl()) return ExprError(); Diag(NameInfo.getLoc(), diag::err_no_member) << NameInfo.getName() << DC << SS.getRange(); return ExprError(); } if (const TypeDecl *TD = R.getAsSingle()) { // Diagnose a missing typename if this resolved unambiguously to a type in // a dependent context. If we can recover with a type, downgrade this to // a warning in Microsoft compatibility mode. unsigned DiagID = diag::err_typename_missing; if (RecoveryTSI && getLangOpts().MSVCCompat) DiagID = diag::ext_typename_missing; SourceLocation Loc = SS.getBeginLoc(); auto D = Diag(Loc, DiagID); D << SS.getScopeRep() << NameInfo.getName().getAsString() << SourceRange(Loc, NameInfo.getEndLoc()); // Don't recover if the caller isn't expecting us to or if we're in a SFINAE // context. if (!RecoveryTSI) return ExprError(); // Only issue the fixit if we're prepared to recover. D << FixItHint::CreateInsertion(Loc, "typename "); // Recover by pretending this was an elaborated type. QualType Ty = Context.getTypeDeclType(TD); TypeLocBuilder TLB; TLB.pushTypeSpec(Ty).setNameLoc(NameInfo.getLoc()); QualType ET = getElaboratedType(ETK_None, SS, Ty); ElaboratedTypeLoc QTL = TLB.push(ET); QTL.setElaboratedKeywordLoc(SourceLocation()); QTL.setQualifierLoc(SS.getWithLocInContext(Context)); *RecoveryTSI = TLB.getTypeSourceInfo(Context, ET); return ExprEmpty(); } // Defend against this resolving to an implicit member access. We usually // won't get here if this might be a legitimate a class member (we end up in // BuildMemberReferenceExpr instead), but this can be valid if we're forming // a pointer-to-member or in an unevaluated context in C++11. if (!R.empty() && (*R.begin())->isCXXClassMember() && !IsAddressOfOperand) return BuildPossibleImplicitMemberExpr(SS, /*TemplateKWLoc=*/SourceLocation(), R, /*TemplateArgs=*/nullptr, S); return BuildDeclarationNameExpr(SS, R, /* ADL */ false); } /// The parser has read a name in, and Sema has detected that we're currently /// inside an ObjC method. Perform some additional checks and determine if we /// should form a reference to an ivar. /// /// Ideally, most of this would be done by lookup, but there's /// actually quite a lot of extra work involved. DeclResult Sema::LookupIvarInObjCMethod(LookupResult &Lookup, Scope *S, IdentifierInfo *II) { SourceLocation Loc = Lookup.getNameLoc(); ObjCMethodDecl *CurMethod = getCurMethodDecl(); // Check for error condition which is already reported. if (!CurMethod) return DeclResult(true); // There are two cases to handle here. 1) scoped lookup could have failed, // in which case we should look for an ivar. 2) scoped lookup could have // found a decl, but that decl is outside the current instance method (i.e. // a global variable). In these two cases, we do a lookup for an ivar with // this name, if the lookup sucedes, we replace it our current decl. // If we're in a class method, we don't normally want to look for // ivars. But if we don't find anything else, and there's an // ivar, that's an error. bool IsClassMethod = CurMethod->isClassMethod(); bool LookForIvars; if (Lookup.empty()) LookForIvars = true; else if (IsClassMethod) LookForIvars = false; else LookForIvars = (Lookup.isSingleResult() && Lookup.getFoundDecl()->isDefinedOutsideFunctionOrMethod()); ObjCInterfaceDecl *IFace = nullptr; if (LookForIvars) { IFace = CurMethod->getClassInterface(); ObjCInterfaceDecl *ClassDeclared; ObjCIvarDecl *IV = nullptr; if (IFace && (IV = IFace->lookupInstanceVariable(II, ClassDeclared))) { // Diagnose using an ivar in a class method. if (IsClassMethod) { Diag(Loc, diag::err_ivar_use_in_class_method) << IV->getDeclName(); return DeclResult(true); } // Diagnose the use of an ivar outside of the declaring class. if (IV->getAccessControl() == ObjCIvarDecl::Private && !declaresSameEntity(ClassDeclared, IFace) && !getLangOpts().DebuggerSupport) Diag(Loc, diag::err_private_ivar_access) << IV->getDeclName(); // Success. return IV; } } else if (CurMethod->isInstanceMethod()) { // We should warn if a local variable hides an ivar. if (ObjCInterfaceDecl *IFace = CurMethod->getClassInterface()) { ObjCInterfaceDecl *ClassDeclared; if (ObjCIvarDecl *IV = IFace->lookupInstanceVariable(II, ClassDeclared)) { if (IV->getAccessControl() != ObjCIvarDecl::Private || declaresSameEntity(IFace, ClassDeclared)) Diag(Loc, diag::warn_ivar_use_hidden) << IV->getDeclName(); } } } else if (Lookup.isSingleResult() && Lookup.getFoundDecl()->isDefinedOutsideFunctionOrMethod()) { // If accessing a stand-alone ivar in a class method, this is an error. if (const ObjCIvarDecl *IV = dyn_cast(Lookup.getFoundDecl())) { Diag(Loc, diag::err_ivar_use_in_class_method) << IV->getDeclName(); return DeclResult(true); } } // Didn't encounter an error, didn't find an ivar. return DeclResult(false); } ExprResult Sema::BuildIvarRefExpr(Scope *S, SourceLocation Loc, ObjCIvarDecl *IV) { ObjCMethodDecl *CurMethod = getCurMethodDecl(); assert(CurMethod && CurMethod->isInstanceMethod() && "should not reference ivar from this context"); ObjCInterfaceDecl *IFace = CurMethod->getClassInterface(); assert(IFace && "should not reference ivar from this context"); // If we're referencing an invalid decl, just return this as a silent // error node. The error diagnostic was already emitted on the decl. if (IV->isInvalidDecl()) return ExprError(); // Check if referencing a field with __attribute__((deprecated)). if (DiagnoseUseOfDecl(IV, Loc)) return ExprError(); // FIXME: This should use a new expr for a direct reference, don't // turn this into Self->ivar, just return a BareIVarExpr or something. IdentifierInfo &II = Context.Idents.get("self"); UnqualifiedId SelfName; SelfName.setImplicitSelfParam(&II); CXXScopeSpec SelfScopeSpec; SourceLocation TemplateKWLoc; ExprResult SelfExpr = ActOnIdExpression(S, SelfScopeSpec, TemplateKWLoc, SelfName, /*HasTrailingLParen=*/false, /*IsAddressOfOperand=*/false); if (SelfExpr.isInvalid()) return ExprError(); SelfExpr = DefaultLvalueConversion(SelfExpr.get()); if (SelfExpr.isInvalid()) return ExprError(); MarkAnyDeclReferenced(Loc, IV, true); ObjCMethodFamily MF = CurMethod->getMethodFamily(); if (MF != OMF_init && MF != OMF_dealloc && MF != OMF_finalize && !IvarBacksCurrentMethodAccessor(IFace, CurMethod, IV)) Diag(Loc, diag::warn_direct_ivar_access) << IV->getDeclName(); ObjCIvarRefExpr *Result = new (Context) ObjCIvarRefExpr(IV, IV->getUsageType(SelfExpr.get()->getType()), Loc, IV->getLocation(), SelfExpr.get(), true, true); if (IV->getType().getObjCLifetime() == Qualifiers::OCL_Weak) { if (!isUnevaluatedContext() && !Diags.isIgnored(diag::warn_arc_repeated_use_of_weak, Loc)) getCurFunction()->recordUseOfWeak(Result); } if (getLangOpts().ObjCAutoRefCount) if (const BlockDecl *BD = CurContext->getInnermostBlockDecl()) ImplicitlyRetainedSelfLocs.push_back({Loc, BD}); return Result; } /// The parser has read a name in, and Sema has detected that we're currently /// inside an ObjC method. Perform some additional checks and determine if we /// should form a reference to an ivar. If so, build an expression referencing /// that ivar. ExprResult Sema::LookupInObjCMethod(LookupResult &Lookup, Scope *S, IdentifierInfo *II, bool AllowBuiltinCreation) { // FIXME: Integrate this lookup step into LookupParsedName. DeclResult Ivar = LookupIvarInObjCMethod(Lookup, S, II); if (Ivar.isInvalid()) return ExprError(); if (Ivar.isUsable()) return BuildIvarRefExpr(S, Lookup.getNameLoc(), cast(Ivar.get())); if (Lookup.empty() && II && AllowBuiltinCreation) LookupBuiltin(Lookup); // Sentinel value saying that we didn't do anything special. return ExprResult(false); } /// Cast a base object to a member's actual type. /// /// There are two relevant checks: /// /// C++ [class.access.base]p7: /// /// If a class member access operator [...] is used to access a non-static /// data member or non-static member function, the reference is ill-formed if /// the left operand [...] cannot be implicitly converted to a pointer to the /// naming class of the right operand. /// /// C++ [expr.ref]p7: /// /// If E2 is a non-static data member or a non-static member function, the /// program is ill-formed if the class of which E2 is directly a member is an /// ambiguous base (11.8) of the naming class (11.9.3) of E2. /// /// Note that the latter check does not consider access; the access of the /// "real" base class is checked as appropriate when checking the access of the /// member name. ExprResult Sema::PerformObjectMemberConversion(Expr *From, NestedNameSpecifier *Qualifier, NamedDecl *FoundDecl, NamedDecl *Member) { CXXRecordDecl *RD = dyn_cast(Member->getDeclContext()); if (!RD) return From; QualType DestRecordType; QualType DestType; QualType FromRecordType; QualType FromType = From->getType(); bool PointerConversions = false; if (isa(Member)) { DestRecordType = Context.getCanonicalType(Context.getTypeDeclType(RD)); auto FromPtrType = FromType->getAs(); DestRecordType = Context.getAddrSpaceQualType( DestRecordType, FromPtrType ? FromType->getPointeeType().getAddressSpace() : FromType.getAddressSpace()); if (FromPtrType) { DestType = Context.getPointerType(DestRecordType); FromRecordType = FromPtrType->getPointeeType(); PointerConversions = true; } else { DestType = DestRecordType; FromRecordType = FromType; } } else if (CXXMethodDecl *Method = dyn_cast(Member)) { if (Method->isStatic()) return From; DestType = Method->getThisType(); DestRecordType = DestType->getPointeeType(); if (FromType->getAs()) { FromRecordType = FromType->getPointeeType(); PointerConversions = true; } else { FromRecordType = FromType; DestType = DestRecordType; } LangAS FromAS = FromRecordType.getAddressSpace(); LangAS DestAS = DestRecordType.getAddressSpace(); if (FromAS != DestAS) { QualType FromRecordTypeWithoutAS = Context.removeAddrSpaceQualType(FromRecordType); QualType FromTypeWithDestAS = Context.getAddrSpaceQualType(FromRecordTypeWithoutAS, DestAS); if (PointerConversions) FromTypeWithDestAS = Context.getPointerType(FromTypeWithDestAS); From = ImpCastExprToType(From, FromTypeWithDestAS, CK_AddressSpaceConversion, From->getValueKind()) .get(); } } else { // No conversion necessary. return From; } if (DestType->isDependentType() || FromType->isDependentType()) return From; // If the unqualified types are the same, no conversion is necessary. if (Context.hasSameUnqualifiedType(FromRecordType, DestRecordType)) return From; SourceRange FromRange = From->getSourceRange(); SourceLocation FromLoc = FromRange.getBegin(); ExprValueKind VK = From->getValueKind(); // C++ [class.member.lookup]p8: // [...] Ambiguities can often be resolved by qualifying a name with its // class name. // // If the member was a qualified name and the qualified referred to a // specific base subobject type, we'll cast to that intermediate type // first and then to the object in which the member is declared. That allows // one to resolve ambiguities in, e.g., a diamond-shaped hierarchy such as: // // class Base { public: int x; }; // class Derived1 : public Base { }; // class Derived2 : public Base { }; // class VeryDerived : public Derived1, public Derived2 { void f(); }; // // void VeryDerived::f() { // x = 17; // error: ambiguous base subobjects // Derived1::x = 17; // okay, pick the Base subobject of Derived1 // } if (Qualifier && Qualifier->getAsType()) { QualType QType = QualType(Qualifier->getAsType(), 0); assert(QType->isRecordType() && "lookup done with non-record type"); QualType QRecordType = QualType(QType->castAs(), 0); // In C++98, the qualifier type doesn't actually have to be a base // type of the object type, in which case we just ignore it. // Otherwise build the appropriate casts. if (IsDerivedFrom(FromLoc, FromRecordType, QRecordType)) { CXXCastPath BasePath; if (CheckDerivedToBaseConversion(FromRecordType, QRecordType, FromLoc, FromRange, &BasePath)) return ExprError(); if (PointerConversions) QType = Context.getPointerType(QType); From = ImpCastExprToType(From, QType, CK_UncheckedDerivedToBase, VK, &BasePath).get(); FromType = QType; FromRecordType = QRecordType; // If the qualifier type was the same as the destination type, // we're done. if (Context.hasSameUnqualifiedType(FromRecordType, DestRecordType)) return From; } } CXXCastPath BasePath; if (CheckDerivedToBaseConversion(FromRecordType, DestRecordType, FromLoc, FromRange, &BasePath, /*IgnoreAccess=*/true)) return ExprError(); return ImpCastExprToType(From, DestType, CK_UncheckedDerivedToBase, VK, &BasePath); } bool Sema::UseArgumentDependentLookup(const CXXScopeSpec &SS, const LookupResult &R, bool HasTrailingLParen) { // Only when used directly as the postfix-expression of a call. if (!HasTrailingLParen) return false; // Never if a scope specifier was provided. if (SS.isSet()) return false; // Only in C++ or ObjC++. if (!getLangOpts().CPlusPlus) return false; // Turn off ADL when we find certain kinds of declarations during // normal lookup: for (NamedDecl *D : R) { // C++0x [basic.lookup.argdep]p3: // -- a declaration of a class member // Since using decls preserve this property, we check this on the // original decl. if (D->isCXXClassMember()) return false; // C++0x [basic.lookup.argdep]p3: // -- a block-scope function declaration that is not a // using-declaration // NOTE: we also trigger this for function templates (in fact, we // don't check the decl type at all, since all other decl types // turn off ADL anyway). if (isa(D)) D = cast(D)->getTargetDecl(); else if (D->getLexicalDeclContext()->isFunctionOrMethod()) return false; // C++0x [basic.lookup.argdep]p3: // -- a declaration that is neither a function or a function // template // And also for builtin functions. if (isa(D)) { FunctionDecl *FDecl = cast(D); // But also builtin functions. if (FDecl->getBuiltinID() && FDecl->isImplicit()) return false; } else if (!isa(D)) return false; } return true; } /// Diagnoses obvious problems with the use of the given declaration /// as an expression. This is only actually called for lookups that /// were not overloaded, and it doesn't promise that the declaration /// will in fact be used. static bool CheckDeclInExpr(Sema &S, SourceLocation Loc, NamedDecl *D) { if (D->isInvalidDecl()) return true; if (isa(D)) { S.Diag(Loc, diag::err_unexpected_typedef) << D->getDeclName(); return true; } if (isa(D)) { S.Diag(Loc, diag::err_unexpected_interface) << D->getDeclName(); return true; } if (isa(D)) { S.Diag(Loc, diag::err_unexpected_namespace) << D->getDeclName(); return true; } return false; } // Certain multiversion types should be treated as overloaded even when there is // only one result. static bool ShouldLookupResultBeMultiVersionOverload(const LookupResult &R) { assert(R.isSingleResult() && "Expected only a single result"); const auto *FD = dyn_cast(R.getFoundDecl()); return FD && (FD->isCPUDispatchMultiVersion() || FD->isCPUSpecificMultiVersion()); } ExprResult Sema::BuildDeclarationNameExpr(const CXXScopeSpec &SS, LookupResult &R, bool NeedsADL, bool AcceptInvalidDecl) { // If this is a single, fully-resolved result and we don't need ADL, // just build an ordinary singleton decl ref. if (!NeedsADL && R.isSingleResult() && !R.getAsSingle() && !ShouldLookupResultBeMultiVersionOverload(R)) return BuildDeclarationNameExpr(SS, R.getLookupNameInfo(), R.getFoundDecl(), R.getRepresentativeDecl(), nullptr, AcceptInvalidDecl); // We only need to check the declaration if there's exactly one // result, because in the overloaded case the results can only be // functions and function templates. if (R.isSingleResult() && !ShouldLookupResultBeMultiVersionOverload(R) && CheckDeclInExpr(*this, R.getNameLoc(), R.getFoundDecl())) return ExprError(); // Otherwise, just build an unresolved lookup expression. Suppress // any lookup-related diagnostics; we'll hash these out later, when // we've picked a target. R.suppressDiagnostics(); UnresolvedLookupExpr *ULE = UnresolvedLookupExpr::Create(Context, R.getNamingClass(), SS.getWithLocInContext(Context), R.getLookupNameInfo(), NeedsADL, R.isOverloadedResult(), R.begin(), R.end()); return ULE; } static void diagnoseUncapturableValueReference(Sema &S, SourceLocation loc, ValueDecl *var); /// Complete semantic analysis for a reference to the given declaration. ExprResult Sema::BuildDeclarationNameExpr( const CXXScopeSpec &SS, const DeclarationNameInfo &NameInfo, NamedDecl *D, NamedDecl *FoundD, const TemplateArgumentListInfo *TemplateArgs, bool AcceptInvalidDecl) { assert(D && "Cannot refer to a NULL declaration"); assert(!isa(D) && "Cannot refer unambiguously to a function template"); SourceLocation Loc = NameInfo.getLoc(); if (CheckDeclInExpr(*this, Loc, D)) { // Recovery from invalid cases (e.g. D is an invalid Decl). // We use the dependent type for the RecoveryExpr to prevent bogus follow-up // diagnostics, as invalid decls use int as a fallback type. return CreateRecoveryExpr(NameInfo.getBeginLoc(), NameInfo.getEndLoc(), {}); } if (TemplateDecl *Template = dyn_cast(D)) { // Specifically diagnose references to class templates that are missing // a template argument list. diagnoseMissingTemplateArguments(TemplateName(Template), Loc); return ExprError(); } // Make sure that we're referring to a value. if (!isa(D)) { Diag(Loc, diag::err_ref_non_value) << D << SS.getRange(); Diag(D->getLocation(), diag::note_declared_at); return ExprError(); } // Check whether this declaration can be used. Note that we suppress // this check when we're going to perform argument-dependent lookup // on this function name, because this might not be the function // that overload resolution actually selects. if (DiagnoseUseOfDecl(D, Loc)) return ExprError(); auto *VD = cast(D); // Only create DeclRefExpr's for valid Decl's. if (VD->isInvalidDecl() && !AcceptInvalidDecl) return ExprError(); // Handle members of anonymous structs and unions. If we got here, // and the reference is to a class member indirect field, then this // must be the subject of a pointer-to-member expression. if (IndirectFieldDecl *indirectField = dyn_cast(VD)) if (!indirectField->isCXXClassMember()) return BuildAnonymousStructUnionMemberReference(SS, NameInfo.getLoc(), indirectField); QualType type = VD->getType(); if (type.isNull()) return ExprError(); ExprValueKind valueKind = VK_PRValue; // In 'T ...V;', the type of the declaration 'V' is 'T...', but the type of // a reference to 'V' is simply (unexpanded) 'T'. The type, like the value, // is expanded by some outer '...' in the context of the use. type = type.getNonPackExpansionType(); switch (D->getKind()) { // Ignore all the non-ValueDecl kinds. #define ABSTRACT_DECL(kind) #define VALUE(type, base) #define DECL(type, base) case Decl::type: #include "clang/AST/DeclNodes.inc" llvm_unreachable("invalid value decl kind"); // These shouldn't make it here. case Decl::ObjCAtDefsField: llvm_unreachable("forming non-member reference to ivar?"); // Enum constants are always r-values and never references. // Unresolved using declarations are dependent. case Decl::EnumConstant: case Decl::UnresolvedUsingValue: case Decl::OMPDeclareReduction: case Decl::OMPDeclareMapper: valueKind = VK_PRValue; break; // Fields and indirect fields that got here must be for // pointer-to-member expressions; we just call them l-values for // internal consistency, because this subexpression doesn't really // exist in the high-level semantics. case Decl::Field: case Decl::IndirectField: case Decl::ObjCIvar: assert(getLangOpts().CPlusPlus && "building reference to field in C?"); // These can't have reference type in well-formed programs, but // for internal consistency we do this anyway. type = type.getNonReferenceType(); valueKind = VK_LValue; break; // Non-type template parameters are either l-values or r-values // depending on the type. case Decl::NonTypeTemplateParm: { if (const ReferenceType *reftype = type->getAs()) { type = reftype->getPointeeType(); valueKind = VK_LValue; // even if the parameter is an r-value reference break; } // [expr.prim.id.unqual]p2: // If the entity is a template parameter object for a template // parameter of type T, the type of the expression is const T. // [...] The expression is an lvalue if the entity is a [...] template // parameter object. if (type->isRecordType()) { type = type.getUnqualifiedType().withConst(); valueKind = VK_LValue; break; } // For non-references, we need to strip qualifiers just in case // the template parameter was declared as 'const int' or whatever. valueKind = VK_PRValue; type = type.getUnqualifiedType(); break; } case Decl::Var: case Decl::VarTemplateSpecialization: case Decl::VarTemplatePartialSpecialization: case Decl::Decomposition: case Decl::OMPCapturedExpr: // In C, "extern void blah;" is valid and is an r-value. if (!getLangOpts().CPlusPlus && !type.hasQualifiers() && type->isVoidType()) { valueKind = VK_PRValue; break; } LLVM_FALLTHROUGH; case Decl::ImplicitParam: case Decl::ParmVar: { // These are always l-values. valueKind = VK_LValue; type = type.getNonReferenceType(); // FIXME: Does the addition of const really only apply in // potentially-evaluated contexts? Since the variable isn't actually // captured in an unevaluated context, it seems that the answer is no. if (!isUnevaluatedContext()) { QualType CapturedType = getCapturedDeclRefType(cast(VD), Loc); if (!CapturedType.isNull()) type = CapturedType; } break; } case Decl::Binding: { // These are always lvalues. valueKind = VK_LValue; type = type.getNonReferenceType(); // FIXME: Support lambda-capture of BindingDecls, once CWG actually // decides how that's supposed to work. auto *BD = cast(VD); if (BD->getDeclContext() != CurContext) { auto *DD = dyn_cast_or_null(BD->getDecomposedDecl()); if (DD && DD->hasLocalStorage()) diagnoseUncapturableValueReference(*this, Loc, BD); } break; } case Decl::Function: { if (unsigned BID = cast(VD)->getBuiltinID()) { if (!Context.BuiltinInfo.isDirectlyAddressable(BID)) { type = Context.BuiltinFnTy; valueKind = VK_PRValue; break; } } const FunctionType *fty = type->castAs(); // If we're referring to a function with an __unknown_anytype // result type, make the entire expression __unknown_anytype. if (fty->getReturnType() == Context.UnknownAnyTy) { type = Context.UnknownAnyTy; valueKind = VK_PRValue; break; } // Functions are l-values in C++. if (getLangOpts().CPlusPlus) { valueKind = VK_LValue; break; } // C99 DR 316 says that, if a function type comes from a // function definition (without a prototype), that type is only // used for checking compatibility. Therefore, when referencing // the function, we pretend that we don't have the full function // type. if (!cast(VD)->hasPrototype() && isa(fty)) type = Context.getFunctionNoProtoType(fty->getReturnType(), fty->getExtInfo()); // Functions are r-values in C. valueKind = VK_PRValue; break; } case Decl::CXXDeductionGuide: llvm_unreachable("building reference to deduction guide"); case Decl::MSProperty: case Decl::MSGuid: case Decl::TemplateParamObject: // FIXME: Should MSGuidDecl and template parameter objects be subject to // capture in OpenMP, or duplicated between host and device? valueKind = VK_LValue; break; case Decl::UnnamedGlobalConstant: valueKind = VK_LValue; break; case Decl::CXXMethod: // If we're referring to a method with an __unknown_anytype // result type, make the entire expression __unknown_anytype. // This should only be possible with a type written directly. if (const FunctionProtoType *proto = dyn_cast(VD->getType())) if (proto->getReturnType() == Context.UnknownAnyTy) { type = Context.UnknownAnyTy; valueKind = VK_PRValue; break; } // C++ methods are l-values if static, r-values if non-static. if (cast(VD)->isStatic()) { valueKind = VK_LValue; break; } LLVM_FALLTHROUGH; case Decl::CXXConversion: case Decl::CXXDestructor: case Decl::CXXConstructor: valueKind = VK_PRValue; break; } return BuildDeclRefExpr(VD, type, valueKind, NameInfo, &SS, FoundD, /*FIXME: TemplateKWLoc*/ SourceLocation(), TemplateArgs); } static void ConvertUTF8ToWideString(unsigned CharByteWidth, StringRef Source, SmallString<32> &Target) { Target.resize(CharByteWidth * (Source.size() + 1)); char *ResultPtr = &Target[0]; const llvm::UTF8 *ErrorPtr; bool success = llvm::ConvertUTF8toWide(CharByteWidth, Source, ResultPtr, ErrorPtr); (void)success; assert(success); Target.resize(ResultPtr - &Target[0]); } ExprResult Sema::BuildPredefinedExpr(SourceLocation Loc, PredefinedExpr::IdentKind IK) { // Pick the current block, lambda, captured statement or function. Decl *currentDecl = nullptr; if (const BlockScopeInfo *BSI = getCurBlock()) currentDecl = BSI->TheDecl; else if (const LambdaScopeInfo *LSI = getCurLambda()) currentDecl = LSI->CallOperator; else if (const CapturedRegionScopeInfo *CSI = getCurCapturedRegion()) currentDecl = CSI->TheCapturedDecl; else currentDecl = getCurFunctionOrMethodDecl(); if (!currentDecl) { Diag(Loc, diag::ext_predef_outside_function); currentDecl = Context.getTranslationUnitDecl(); } QualType ResTy; StringLiteral *SL = nullptr; if (cast(currentDecl)->isDependentContext()) ResTy = Context.DependentTy; else { // Pre-defined identifiers are of type char[x], where x is the length of // the string. auto Str = PredefinedExpr::ComputeName(IK, currentDecl); unsigned Length = Str.length(); llvm::APInt LengthI(32, Length + 1); if (IK == PredefinedExpr::LFunction || IK == PredefinedExpr::LFuncSig) { ResTy = Context.adjustStringLiteralBaseType(Context.WideCharTy.withConst()); SmallString<32> RawChars; ConvertUTF8ToWideString(Context.getTypeSizeInChars(ResTy).getQuantity(), Str, RawChars); ResTy = Context.getConstantArrayType(ResTy, LengthI, nullptr, ArrayType::Normal, /*IndexTypeQuals*/ 0); SL = StringLiteral::Create(Context, RawChars, StringLiteral::Wide, /*Pascal*/ false, ResTy, Loc); } else { ResTy = Context.adjustStringLiteralBaseType(Context.CharTy.withConst()); ResTy = Context.getConstantArrayType(ResTy, LengthI, nullptr, ArrayType::Normal, /*IndexTypeQuals*/ 0); SL = StringLiteral::Create(Context, Str, StringLiteral::Ordinary, /*Pascal*/ false, ResTy, Loc); } } return PredefinedExpr::Create(Context, Loc, ResTy, IK, SL); } ExprResult Sema::BuildSYCLUniqueStableNameExpr(SourceLocation OpLoc, SourceLocation LParen, SourceLocation RParen, TypeSourceInfo *TSI) { return SYCLUniqueStableNameExpr::Create(Context, OpLoc, LParen, RParen, TSI); } ExprResult Sema::ActOnSYCLUniqueStableNameExpr(SourceLocation OpLoc, SourceLocation LParen, SourceLocation RParen, ParsedType ParsedTy) { TypeSourceInfo *TSI = nullptr; QualType Ty = GetTypeFromParser(ParsedTy, &TSI); if (Ty.isNull()) return ExprError(); if (!TSI) TSI = Context.getTrivialTypeSourceInfo(Ty, LParen); return BuildSYCLUniqueStableNameExpr(OpLoc, LParen, RParen, TSI); } ExprResult Sema::ActOnPredefinedExpr(SourceLocation Loc, tok::TokenKind Kind) { PredefinedExpr::IdentKind IK; switch (Kind) { default: llvm_unreachable("Unknown simple primary expr!"); case tok::kw___func__: IK = PredefinedExpr::Func; break; // [C99 6.4.2.2] case tok::kw___FUNCTION__: IK = PredefinedExpr::Function; break; case tok::kw___FUNCDNAME__: IK = PredefinedExpr::FuncDName; break; // [MS] case tok::kw___FUNCSIG__: IK = PredefinedExpr::FuncSig; break; // [MS] case tok::kw_L__FUNCTION__: IK = PredefinedExpr::LFunction; break; // [MS] case tok::kw_L__FUNCSIG__: IK = PredefinedExpr::LFuncSig; break; // [MS] case tok::kw___PRETTY_FUNCTION__: IK = PredefinedExpr::PrettyFunction; break; } return BuildPredefinedExpr(Loc, IK); } ExprResult Sema::ActOnCharacterConstant(const Token &Tok, Scope *UDLScope) { SmallString<16> CharBuffer; bool Invalid = false; StringRef ThisTok = PP.getSpelling(Tok, CharBuffer, &Invalid); if (Invalid) return ExprError(); CharLiteralParser Literal(ThisTok.begin(), ThisTok.end(), Tok.getLocation(), PP, Tok.getKind()); if (Literal.hadError()) return ExprError(); QualType Ty; if (Literal.isWide()) Ty = Context.WideCharTy; // L'x' -> wchar_t in C and C++. else if (Literal.isUTF8() && getLangOpts().C2x) Ty = Context.UnsignedCharTy; // u8'x' -> unsigned char in C2x else if (Literal.isUTF8() && getLangOpts().Char8) Ty = Context.Char8Ty; // u8'x' -> char8_t when it exists. else if (Literal.isUTF16()) Ty = Context.Char16Ty; // u'x' -> char16_t in C11 and C++11. else if (Literal.isUTF32()) Ty = Context.Char32Ty; // U'x' -> char32_t in C11 and C++11. else if (!getLangOpts().CPlusPlus || Literal.isMultiChar()) Ty = Context.IntTy; // 'x' -> int in C, 'wxyz' -> int in C++. else Ty = Context.CharTy; // 'x' -> char in C++; // u8'x' -> char in C11-C17 and in C++ without char8_t. CharacterLiteral::CharacterKind Kind = CharacterLiteral::Ascii; if (Literal.isWide()) Kind = CharacterLiteral::Wide; else if (Literal.isUTF16()) Kind = CharacterLiteral::UTF16; else if (Literal.isUTF32()) Kind = CharacterLiteral::UTF32; else if (Literal.isUTF8()) Kind = CharacterLiteral::UTF8; Expr *Lit = new (Context) CharacterLiteral(Literal.getValue(), Kind, Ty, Tok.getLocation()); if (Literal.getUDSuffix().empty()) return Lit; // We're building a user-defined literal. IdentifierInfo *UDSuffix = &Context.Idents.get(Literal.getUDSuffix()); SourceLocation UDSuffixLoc = getUDSuffixLoc(*this, Tok.getLocation(), Literal.getUDSuffixOffset()); // Make sure we're allowed user-defined literals here. if (!UDLScope) return ExprError(Diag(UDSuffixLoc, diag::err_invalid_character_udl)); // C++11 [lex.ext]p6: The literal L is treated as a call of the form // operator "" X (ch) return BuildCookedLiteralOperatorCall(*this, UDLScope, UDSuffix, UDSuffixLoc, Lit, Tok.getLocation()); } ExprResult Sema::ActOnIntegerConstant(SourceLocation Loc, uint64_t Val) { unsigned IntSize = Context.getTargetInfo().getIntWidth(); return IntegerLiteral::Create(Context, llvm::APInt(IntSize, Val), Context.IntTy, Loc); } static Expr *BuildFloatingLiteral(Sema &S, NumericLiteralParser &Literal, QualType Ty, SourceLocation Loc) { const llvm::fltSemantics &Format = S.Context.getFloatTypeSemantics(Ty); using llvm::APFloat; APFloat Val(Format); APFloat::opStatus result = Literal.GetFloatValue(Val); // Overflow is always an error, but underflow is only an error if // we underflowed to zero (APFloat reports denormals as underflow). if ((result & APFloat::opOverflow) || ((result & APFloat::opUnderflow) && Val.isZero())) { unsigned diagnostic; SmallString<20> buffer; if (result & APFloat::opOverflow) { diagnostic = diag::warn_float_overflow; APFloat::getLargest(Format).toString(buffer); } else { diagnostic = diag::warn_float_underflow; APFloat::getSmallest(Format).toString(buffer); } S.Diag(Loc, diagnostic) << Ty << StringRef(buffer.data(), buffer.size()); } bool isExact = (result == APFloat::opOK); return FloatingLiteral::Create(S.Context, Val, isExact, Ty, Loc); } bool Sema::CheckLoopHintExpr(Expr *E, SourceLocation Loc) { assert(E && "Invalid expression"); if (E->isValueDependent()) return false; QualType QT = E->getType(); if (!QT->isIntegerType() || QT->isBooleanType() || QT->isCharType()) { Diag(E->getExprLoc(), diag::err_pragma_loop_invalid_argument_type) << QT; return true; } llvm::APSInt ValueAPS; ExprResult R = VerifyIntegerConstantExpression(E, &ValueAPS); if (R.isInvalid()) return true; bool ValueIsPositive = ValueAPS.isStrictlyPositive(); if (!ValueIsPositive || ValueAPS.getActiveBits() > 31) { Diag(E->getExprLoc(), diag::err_pragma_loop_invalid_argument_value) << toString(ValueAPS, 10) << ValueIsPositive; return true; } return false; } ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) { // Fast path for a single digit (which is quite common). A single digit // cannot have a trigraph, escaped newline, radix prefix, or suffix. if (Tok.getLength() == 1) { const char Val = PP.getSpellingOfSingleCharacterNumericConstant(Tok); return ActOnIntegerConstant(Tok.getLocation(), Val-'0'); } SmallString<128> SpellingBuffer; // NumericLiteralParser wants to overread by one character. Add padding to // the buffer in case the token is copied to the buffer. If getSpelling() // returns a StringRef to the memory buffer, it should have a null char at // the EOF, so it is also safe. SpellingBuffer.resize(Tok.getLength() + 1); // Get the spelling of the token, which eliminates trigraphs, etc. bool Invalid = false; StringRef TokSpelling = PP.getSpelling(Tok, SpellingBuffer, &Invalid); if (Invalid) return ExprError(); NumericLiteralParser Literal(TokSpelling, Tok.getLocation(), PP.getSourceManager(), PP.getLangOpts(), PP.getTargetInfo(), PP.getDiagnostics()); if (Literal.hadError) return ExprError(); if (Literal.hasUDSuffix()) { // We're building a user-defined literal. IdentifierInfo *UDSuffix = &Context.Idents.get(Literal.getUDSuffix()); SourceLocation UDSuffixLoc = getUDSuffixLoc(*this, Tok.getLocation(), Literal.getUDSuffixOffset()); // Make sure we're allowed user-defined literals here. if (!UDLScope) return ExprError(Diag(UDSuffixLoc, diag::err_invalid_numeric_udl)); QualType CookedTy; if (Literal.isFloatingLiteral()) { // C++11 [lex.ext]p4: If S contains a literal operator with parameter type // long double, the literal is treated as a call of the form // operator "" X (f L) CookedTy = Context.LongDoubleTy; } else { // C++11 [lex.ext]p3: If S contains a literal operator with parameter type // unsigned long long, the literal is treated as a call of the form // operator "" X (n ULL) CookedTy = Context.UnsignedLongLongTy; } DeclarationName OpName = Context.DeclarationNames.getCXXLiteralOperatorName(UDSuffix); DeclarationNameInfo OpNameInfo(OpName, UDSuffixLoc); OpNameInfo.setCXXLiteralOperatorNameLoc(UDSuffixLoc); SourceLocation TokLoc = Tok.getLocation(); // Perform literal operator lookup to determine if we're building a raw // literal or a cooked one. LookupResult R(*this, OpName, UDSuffixLoc, LookupOrdinaryName); switch (LookupLiteralOperator(UDLScope, R, CookedTy, /*AllowRaw*/ true, /*AllowTemplate*/ true, /*AllowStringTemplatePack*/ false, /*DiagnoseMissing*/ !Literal.isImaginary)) { case LOLR_ErrorNoDiagnostic: // Lookup failure for imaginary constants isn't fatal, there's still the // GNU extension producing _Complex types. break; case LOLR_Error: return ExprError(); case LOLR_Cooked: { Expr *Lit; if (Literal.isFloatingLiteral()) { Lit = BuildFloatingLiteral(*this, Literal, CookedTy, Tok.getLocation()); } else { llvm::APInt ResultVal(Context.getTargetInfo().getLongLongWidth(), 0); if (Literal.GetIntegerValue(ResultVal)) Diag(Tok.getLocation(), diag::err_integer_literal_too_large) << /* Unsigned */ 1; Lit = IntegerLiteral::Create(Context, ResultVal, CookedTy, Tok.getLocation()); } return BuildLiteralOperatorCall(R, OpNameInfo, Lit, TokLoc); } case LOLR_Raw: { // C++11 [lit.ext]p3, p4: If S contains a raw literal operator, the // literal is treated as a call of the form // operator "" X ("n") unsigned Length = Literal.getUDSuffixOffset(); QualType StrTy = Context.getConstantArrayType( Context.adjustStringLiteralBaseType(Context.CharTy.withConst()), llvm::APInt(32, Length + 1), nullptr, ArrayType::Normal, 0); Expr *Lit = StringLiteral::Create(Context, StringRef(TokSpelling.data(), Length), StringLiteral::Ordinary, /*Pascal*/ false, StrTy, &TokLoc, 1); return BuildLiteralOperatorCall(R, OpNameInfo, Lit, TokLoc); } case LOLR_Template: { // C++11 [lit.ext]p3, p4: Otherwise (S contains a literal operator // template), L is treated as a call fo the form // operator "" X <'c1', 'c2', ... 'ck'>() // where n is the source character sequence c1 c2 ... ck. TemplateArgumentListInfo ExplicitArgs; unsigned CharBits = Context.getIntWidth(Context.CharTy); bool CharIsUnsigned = Context.CharTy->isUnsignedIntegerType(); llvm::APSInt Value(CharBits, CharIsUnsigned); for (unsigned I = 0, N = Literal.getUDSuffixOffset(); I != N; ++I) { Value = TokSpelling[I]; TemplateArgument Arg(Context, Value, Context.CharTy); TemplateArgumentLocInfo ArgInfo; ExplicitArgs.addArgument(TemplateArgumentLoc(Arg, ArgInfo)); } return BuildLiteralOperatorCall(R, OpNameInfo, None, TokLoc, &ExplicitArgs); } case LOLR_StringTemplatePack: llvm_unreachable("unexpected literal operator lookup result"); } } Expr *Res; if (Literal.isFixedPointLiteral()) { QualType Ty; if (Literal.isAccum) { if (Literal.isHalf) { Ty = Context.ShortAccumTy; } else if (Literal.isLong) { Ty = Context.LongAccumTy; } else { Ty = Context.AccumTy; } } else if (Literal.isFract) { if (Literal.isHalf) { Ty = Context.ShortFractTy; } else if (Literal.isLong) { Ty = Context.LongFractTy; } else { Ty = Context.FractTy; } } if (Literal.isUnsigned) Ty = Context.getCorrespondingUnsignedType(Ty); bool isSigned = !Literal.isUnsigned; unsigned scale = Context.getFixedPointScale(Ty); unsigned bit_width = Context.getTypeInfo(Ty).Width; llvm::APInt Val(bit_width, 0, isSigned); bool Overflowed = Literal.GetFixedPointValue(Val, scale); bool ValIsZero = Val.isZero() && !Overflowed; auto MaxVal = Context.getFixedPointMax(Ty).getValue(); if (Literal.isFract && Val == MaxVal + 1 && !ValIsZero) // Clause 6.4.4 - The value of a constant shall be in the range of // representable values for its type, with exception for constants of a // fract type with a value of exactly 1; such a constant shall denote // the maximal value for the type. --Val; else if (Val.ugt(MaxVal) || Overflowed) Diag(Tok.getLocation(), diag::err_too_large_for_fixed_point); Res = FixedPointLiteral::CreateFromRawInt(Context, Val, Ty, Tok.getLocation(), scale); } else if (Literal.isFloatingLiteral()) { QualType Ty; if (Literal.isHalf){ if (getOpenCLOptions().isAvailableOption("cl_khr_fp16", getLangOpts())) Ty = Context.HalfTy; else { Diag(Tok.getLocation(), diag::err_half_const_requires_fp16); return ExprError(); } } else if (Literal.isFloat) Ty = Context.FloatTy; else if (Literal.isLong) Ty = Context.LongDoubleTy; else if (Literal.isFloat16) Ty = Context.Float16Ty; else if (Literal.isFloat128) Ty = Context.Float128Ty; else Ty = Context.DoubleTy; Res = BuildFloatingLiteral(*this, Literal, Ty, Tok.getLocation()); if (Ty == Context.DoubleTy) { if (getLangOpts().SinglePrecisionConstants) { if (Ty->castAs()->getKind() != BuiltinType::Float) { Res = ImpCastExprToType(Res, Context.FloatTy, CK_FloatingCast).get(); } } else if (getLangOpts().OpenCL && !getOpenCLOptions().isAvailableOption( "cl_khr_fp64", getLangOpts())) { // Impose single-precision float type when cl_khr_fp64 is not enabled. Diag(Tok.getLocation(), diag::warn_double_const_requires_fp64) << (getLangOpts().getOpenCLCompatibleVersion() >= 300); Res = ImpCastExprToType(Res, Context.FloatTy, CK_FloatingCast).get(); } } } else if (!Literal.isIntegerLiteral()) { return ExprError(); } else { QualType Ty; // 'long long' is a C99 or C++11 feature. if (!getLangOpts().C99 && Literal.isLongLong) { if (getLangOpts().CPlusPlus) Diag(Tok.getLocation(), getLangOpts().CPlusPlus11 ? diag::warn_cxx98_compat_longlong : diag::ext_cxx11_longlong); else Diag(Tok.getLocation(), diag::ext_c99_longlong); } // 'z/uz' literals are a C++2b feature. if (Literal.isSizeT) Diag(Tok.getLocation(), getLangOpts().CPlusPlus ? getLangOpts().CPlusPlus2b ? diag::warn_cxx20_compat_size_t_suffix : diag::ext_cxx2b_size_t_suffix : diag::err_cxx2b_size_t_suffix); // 'wb/uwb' literals are a C2x feature. We support _BitInt as a type in C++, // but we do not currently support the suffix in C++ mode because it's not // entirely clear whether WG21 will prefer this suffix to return a library // type such as std::bit_int instead of returning a _BitInt. if (Literal.isBitInt && !getLangOpts().CPlusPlus) PP.Diag(Tok.getLocation(), getLangOpts().C2x ? diag::warn_c2x_compat_bitint_suffix : diag::ext_c2x_bitint_suffix); // Get the value in the widest-possible width. What is "widest" depends on // whether the literal is a bit-precise integer or not. For a bit-precise // integer type, try to scan the source to determine how many bits are // needed to represent the value. This may seem a bit expensive, but trying // to get the integer value from an overly-wide APInt is *extremely* // expensive, so the naive approach of assuming // llvm::IntegerType::MAX_INT_BITS is a big performance hit. unsigned BitsNeeded = Literal.isBitInt ? llvm::APInt::getSufficientBitsNeeded( Literal.getLiteralDigits(), Literal.getRadix()) : Context.getTargetInfo().getIntMaxTWidth(); llvm::APInt ResultVal(BitsNeeded, 0); if (Literal.GetIntegerValue(ResultVal)) { // If this value didn't fit into uintmax_t, error and force to ull. Diag(Tok.getLocation(), diag::err_integer_literal_too_large) << /* Unsigned */ 1; Ty = Context.UnsignedLongLongTy; assert(Context.getTypeSize(Ty) == ResultVal.getBitWidth() && "long long is not intmax_t?"); } else { // If this value fits into a ULL, try to figure out what else it fits into // according to the rules of C99 6.4.4.1p5. // Octal, Hexadecimal, and integers with a U suffix are allowed to // be an unsigned int. bool AllowUnsigned = Literal.isUnsigned || Literal.getRadix() != 10; // Check from smallest to largest, picking the smallest type we can. unsigned Width = 0; // Microsoft specific integer suffixes are explicitly sized. if (Literal.MicrosoftInteger) { if (Literal.MicrosoftInteger == 8 && !Literal.isUnsigned) { Width = 8; Ty = Context.CharTy; } else { Width = Literal.MicrosoftInteger; Ty = Context.getIntTypeForBitwidth(Width, /*Signed=*/!Literal.isUnsigned); } } // Bit-precise integer literals are automagically-sized based on the // width required by the literal. if (Literal.isBitInt) { // The signed version has one more bit for the sign value. There are no // zero-width bit-precise integers, even if the literal value is 0. Width = std::max(ResultVal.getActiveBits(), 1u) + (Literal.isUnsigned ? 0u : 1u); // Diagnose if the width of the constant is larger than BITINT_MAXWIDTH, // and reset the type to the largest supported width. unsigned int MaxBitIntWidth = Context.getTargetInfo().getMaxBitIntWidth(); if (Width > MaxBitIntWidth) { Diag(Tok.getLocation(), diag::err_integer_literal_too_large) << Literal.isUnsigned; Width = MaxBitIntWidth; } // Reset the result value to the smaller APInt and select the correct // type to be used. Note, we zext even for signed values because the // literal itself is always an unsigned value (a preceeding - is a // unary operator, not part of the literal). ResultVal = ResultVal.zextOrTrunc(Width); Ty = Context.getBitIntType(Literal.isUnsigned, Width); } // Check C++2b size_t literals. if (Literal.isSizeT) { assert(!Literal.MicrosoftInteger && "size_t literals can't be Microsoft literals"); unsigned SizeTSize = Context.getTargetInfo().getTypeWidth( Context.getTargetInfo().getSizeType()); // Does it fit in size_t? if (ResultVal.isIntN(SizeTSize)) { // Does it fit in ssize_t? if (!Literal.isUnsigned && ResultVal[SizeTSize - 1] == 0) Ty = Context.getSignedSizeType(); else if (AllowUnsigned) Ty = Context.getSizeType(); Width = SizeTSize; } } if (Ty.isNull() && !Literal.isLong && !Literal.isLongLong && !Literal.isSizeT) { // Are int/unsigned possibilities? unsigned IntSize = Context.getTargetInfo().getIntWidth(); // Does it fit in a unsigned int? if (ResultVal.isIntN(IntSize)) { // Does it fit in a signed int? if (!Literal.isUnsigned && ResultVal[IntSize-1] == 0) Ty = Context.IntTy; else if (AllowUnsigned) Ty = Context.UnsignedIntTy; Width = IntSize; } } // Are long/unsigned long possibilities? if (Ty.isNull() && !Literal.isLongLong && !Literal.isSizeT) { unsigned LongSize = Context.getTargetInfo().getLongWidth(); // Does it fit in a unsigned long? if (ResultVal.isIntN(LongSize)) { // Does it fit in a signed long? if (!Literal.isUnsigned && ResultVal[LongSize-1] == 0) Ty = Context.LongTy; else if (AllowUnsigned) Ty = Context.UnsignedLongTy; // Check according to the rules of C90 6.1.3.2p5. C++03 [lex.icon]p2 // is compatible. else if (!getLangOpts().C99 && !getLangOpts().CPlusPlus11) { const unsigned LongLongSize = Context.getTargetInfo().getLongLongWidth(); Diag(Tok.getLocation(), getLangOpts().CPlusPlus ? Literal.isLong ? diag::warn_old_implicitly_unsigned_long_cxx : /*C++98 UB*/ diag:: ext_old_implicitly_unsigned_long_cxx : diag::warn_old_implicitly_unsigned_long) << (LongLongSize > LongSize ? /*will have type 'long long'*/ 0 : /*will be ill-formed*/ 1); Ty = Context.UnsignedLongTy; } Width = LongSize; } } // Check long long if needed. if (Ty.isNull() && !Literal.isSizeT) { unsigned LongLongSize = Context.getTargetInfo().getLongLongWidth(); // Does it fit in a unsigned long long? if (ResultVal.isIntN(LongLongSize)) { // Does it fit in a signed long long? // To be compatible with MSVC, hex integer literals ending with the // LL or i64 suffix are always signed in Microsoft mode. if (!Literal.isUnsigned && (ResultVal[LongLongSize-1] == 0 || (getLangOpts().MSVCCompat && Literal.isLongLong))) Ty = Context.LongLongTy; else if (AllowUnsigned) Ty = Context.UnsignedLongLongTy; Width = LongLongSize; } } // If we still couldn't decide a type, we either have 'size_t' literal // that is out of range, or a decimal literal that does not fit in a // signed long long and has no U suffix. if (Ty.isNull()) { if (Literal.isSizeT) Diag(Tok.getLocation(), diag::err_size_t_literal_too_large) << Literal.isUnsigned; else Diag(Tok.getLocation(), diag::ext_integer_literal_too_large_for_signed); Ty = Context.UnsignedLongLongTy; Width = Context.getTargetInfo().getLongLongWidth(); } if (ResultVal.getBitWidth() != Width) ResultVal = ResultVal.trunc(Width); } Res = IntegerLiteral::Create(Context, ResultVal, Ty, Tok.getLocation()); } // If this is an imaginary literal, create the ImaginaryLiteral wrapper. if (Literal.isImaginary) { Res = new (Context) ImaginaryLiteral(Res, Context.getComplexType(Res->getType())); Diag(Tok.getLocation(), diag::ext_imaginary_constant); } return Res; } ExprResult Sema::ActOnParenExpr(SourceLocation L, SourceLocation R, Expr *E) { assert(E && "ActOnParenExpr() missing expr"); QualType ExprTy = E->getType(); if (getLangOpts().ProtectParens && CurFPFeatures.getAllowFPReassociate() && !E->isLValue() && ExprTy->hasFloatingRepresentation()) return BuildBuiltinCallExpr(R, Builtin::BI__arithmetic_fence, E); return new (Context) ParenExpr(L, R, E); } static bool CheckVecStepTraitOperandType(Sema &S, QualType T, SourceLocation Loc, SourceRange ArgRange) { // [OpenCL 1.1 6.11.12] "The vec_step built-in function takes a built-in // scalar or vector data type argument..." // Every built-in scalar type (OpenCL 1.1 6.1.1) is either an arithmetic // type (C99 6.2.5p18) or void. if (!(T->isArithmeticType() || T->isVoidType() || T->isVectorType())) { S.Diag(Loc, diag::err_vecstep_non_scalar_vector_type) << T << ArgRange; return true; } assert((T->isVoidType() || !T->isIncompleteType()) && "Scalar types should always be complete"); return false; } static bool CheckExtensionTraitOperandType(Sema &S, QualType T, SourceLocation Loc, SourceRange ArgRange, UnaryExprOrTypeTrait TraitKind) { // Invalid types must be hard errors for SFINAE in C++. if (S.LangOpts.CPlusPlus) return true; // C99 6.5.3.4p1: if (T->isFunctionType() && (TraitKind == UETT_SizeOf || TraitKind == UETT_AlignOf || TraitKind == UETT_PreferredAlignOf)) { // sizeof(function)/alignof(function) is allowed as an extension. S.Diag(Loc, diag::ext_sizeof_alignof_function_type) << getTraitSpelling(TraitKind) << ArgRange; return false; } // Allow sizeof(void)/alignof(void) as an extension, unless in OpenCL where // this is an error (OpenCL v1.1 s6.3.k) if (T->isVoidType()) { unsigned DiagID = S.LangOpts.OpenCL ? diag::err_opencl_sizeof_alignof_type : diag::ext_sizeof_alignof_void_type; S.Diag(Loc, DiagID) << getTraitSpelling(TraitKind) << ArgRange; return false; } return true; } static bool CheckObjCTraitOperandConstraints(Sema &S, QualType T, SourceLocation Loc, SourceRange ArgRange, UnaryExprOrTypeTrait TraitKind) { // Reject sizeof(interface) and sizeof(interface) if the // runtime doesn't allow it. if (!S.LangOpts.ObjCRuntime.allowsSizeofAlignof() && T->isObjCObjectType()) { S.Diag(Loc, diag::err_sizeof_nonfragile_interface) << T << (TraitKind == UETT_SizeOf) << ArgRange; return true; } return false; } /// Check whether E is a pointer from a decayed array type (the decayed /// pointer type is equal to T) and emit a warning if it is. static void warnOnSizeofOnArrayDecay(Sema &S, SourceLocation Loc, QualType T, Expr *E) { // Don't warn if the operation changed the type. if (T != E->getType()) return; // Now look for array decays. ImplicitCastExpr *ICE = dyn_cast(E); if (!ICE || ICE->getCastKind() != CK_ArrayToPointerDecay) return; S.Diag(Loc, diag::warn_sizeof_array_decay) << ICE->getSourceRange() << ICE->getType() << ICE->getSubExpr()->getType(); } /// Check the constraints on expression operands to unary type expression /// and type traits. /// /// Completes any types necessary and validates the constraints on the operand /// expression. The logic mostly mirrors the type-based overload, but may modify /// the expression as it completes the type for that expression through template /// instantiation, etc. bool Sema::CheckUnaryExprOrTypeTraitOperand(Expr *E, UnaryExprOrTypeTrait ExprKind) { QualType ExprTy = E->getType(); assert(!ExprTy->isReferenceType()); bool IsUnevaluatedOperand = (ExprKind == UETT_SizeOf || ExprKind == UETT_AlignOf || ExprKind == UETT_PreferredAlignOf || ExprKind == UETT_VecStep); if (IsUnevaluatedOperand) { ExprResult Result = CheckUnevaluatedOperand(E); if (Result.isInvalid()) return true; E = Result.get(); } // The operand for sizeof and alignof is in an unevaluated expression context, // so side effects could result in unintended consequences. // Exclude instantiation-dependent expressions, because 'sizeof' is sometimes // used to build SFINAE gadgets. // FIXME: Should we consider instantiation-dependent operands to 'alignof'? if (IsUnevaluatedOperand && !inTemplateInstantiation() && !E->isInstantiationDependent() && !E->getType()->isVariableArrayType() && E->HasSideEffects(Context, false)) Diag(E->getExprLoc(), diag::warn_side_effects_unevaluated_context); if (ExprKind == UETT_VecStep) return CheckVecStepTraitOperandType(*this, ExprTy, E->getExprLoc(), E->getSourceRange()); // Explicitly list some types as extensions. if (!CheckExtensionTraitOperandType(*this, ExprTy, E->getExprLoc(), E->getSourceRange(), ExprKind)) return false; // 'alignof' applied to an expression only requires the base element type of // the expression to be complete. 'sizeof' requires the expression's type to // be complete (and will attempt to complete it if it's an array of unknown // bound). if (ExprKind == UETT_AlignOf || ExprKind == UETT_PreferredAlignOf) { if (RequireCompleteSizedType( E->getExprLoc(), Context.getBaseElementType(E->getType()), diag::err_sizeof_alignof_incomplete_or_sizeless_type, getTraitSpelling(ExprKind), E->getSourceRange())) return true; } else { if (RequireCompleteSizedExprType( E, diag::err_sizeof_alignof_incomplete_or_sizeless_type, getTraitSpelling(ExprKind), E->getSourceRange())) return true; } // Completing the expression's type may have changed it. ExprTy = E->getType(); assert(!ExprTy->isReferenceType()); if (ExprTy->isFunctionType()) { Diag(E->getExprLoc(), diag::err_sizeof_alignof_function_type) << getTraitSpelling(ExprKind) << E->getSourceRange(); return true; } if (CheckObjCTraitOperandConstraints(*this, ExprTy, E->getExprLoc(), E->getSourceRange(), ExprKind)) return true; if (ExprKind == UETT_SizeOf) { if (DeclRefExpr *DeclRef = dyn_cast(E->IgnoreParens())) { if (ParmVarDecl *PVD = dyn_cast(DeclRef->getFoundDecl())) { QualType OType = PVD->getOriginalType(); QualType Type = PVD->getType(); if (Type->isPointerType() && OType->isArrayType()) { Diag(E->getExprLoc(), diag::warn_sizeof_array_param) << Type << OType; Diag(PVD->getLocation(), diag::note_declared_at); } } } // Warn on "sizeof(array op x)" and "sizeof(x op array)", where the array // decays into a pointer and returns an unintended result. This is most // likely a typo for "sizeof(array) op x". if (BinaryOperator *BO = dyn_cast(E->IgnoreParens())) { warnOnSizeofOnArrayDecay(*this, BO->getOperatorLoc(), BO->getType(), BO->getLHS()); warnOnSizeofOnArrayDecay(*this, BO->getOperatorLoc(), BO->getType(), BO->getRHS()); } } return false; } /// Check the constraints on operands to unary expression and type /// traits. /// /// This will complete any types necessary, and validate the various constraints /// on those operands. /// /// The UsualUnaryConversions() function is *not* called by this routine. /// C99 6.3.2.1p[2-4] all state: /// Except when it is the operand of the sizeof operator ... /// /// C++ [expr.sizeof]p4 /// The lvalue-to-rvalue, array-to-pointer, and function-to-pointer /// standard conversions are not applied to the operand of sizeof. /// /// This policy is followed for all of the unary trait expressions. bool Sema::CheckUnaryExprOrTypeTraitOperand(QualType ExprType, SourceLocation OpLoc, SourceRange ExprRange, UnaryExprOrTypeTrait ExprKind) { if (ExprType->isDependentType()) return false; // C++ [expr.sizeof]p2: // When applied to a reference or a reference type, the result // is the size of the referenced type. // C++11 [expr.alignof]p3: // When alignof is applied to a reference type, the result // shall be the alignment of the referenced type. if (const ReferenceType *Ref = ExprType->getAs()) ExprType = Ref->getPointeeType(); // C11 6.5.3.4/3, C++11 [expr.alignof]p3: // When alignof or _Alignof is applied to an array type, the result // is the alignment of the element type. if (ExprKind == UETT_AlignOf || ExprKind == UETT_PreferredAlignOf || ExprKind == UETT_OpenMPRequiredSimdAlign) ExprType = Context.getBaseElementType(ExprType); if (ExprKind == UETT_VecStep) return CheckVecStepTraitOperandType(*this, ExprType, OpLoc, ExprRange); // Explicitly list some types as extensions. if (!CheckExtensionTraitOperandType(*this, ExprType, OpLoc, ExprRange, ExprKind)) return false; if (RequireCompleteSizedType( OpLoc, ExprType, diag::err_sizeof_alignof_incomplete_or_sizeless_type, getTraitSpelling(ExprKind), ExprRange)) return true; if (ExprType->isFunctionType()) { Diag(OpLoc, diag::err_sizeof_alignof_function_type) << getTraitSpelling(ExprKind) << ExprRange; return true; } if (CheckObjCTraitOperandConstraints(*this, ExprType, OpLoc, ExprRange, ExprKind)) return true; return false; } static bool CheckAlignOfExpr(Sema &S, Expr *E, UnaryExprOrTypeTrait ExprKind) { // Cannot know anything else if the expression is dependent. if (E->isTypeDependent()) return false; if (E->getObjectKind() == OK_BitField) { S.Diag(E->getExprLoc(), diag::err_sizeof_alignof_typeof_bitfield) << 1 << E->getSourceRange(); return true; } ValueDecl *D = nullptr; Expr *Inner = E->IgnoreParens(); if (DeclRefExpr *DRE = dyn_cast(Inner)) { D = DRE->getDecl(); } else if (MemberExpr *ME = dyn_cast(Inner)) { D = ME->getMemberDecl(); } // If it's a field, require the containing struct to have a // complete definition so that we can compute the layout. // // This can happen in C++11 onwards, either by naming the member // in a way that is not transformed into a member access expression // (in an unevaluated operand, for instance), or by naming the member // in a trailing-return-type. // // For the record, since __alignof__ on expressions is a GCC // extension, GCC seems to permit this but always gives the // nonsensical answer 0. // // We don't really need the layout here --- we could instead just // directly check for all the appropriate alignment-lowing // attributes --- but that would require duplicating a lot of // logic that just isn't worth duplicating for such a marginal // use-case. if (FieldDecl *FD = dyn_cast_or_null(D)) { // Fast path this check, since we at least know the record has a // definition if we can find a member of it. if (!FD->getParent()->isCompleteDefinition()) { S.Diag(E->getExprLoc(), diag::err_alignof_member_of_incomplete_type) << E->getSourceRange(); return true; } // Otherwise, if it's a field, and the field doesn't have // reference type, then it must have a complete type (or be a // flexible array member, which we explicitly want to // white-list anyway), which makes the following checks trivial. if (!FD->getType()->isReferenceType()) return false; } return S.CheckUnaryExprOrTypeTraitOperand(E, ExprKind); } bool Sema::CheckVecStepExpr(Expr *E) { E = E->IgnoreParens(); // Cannot know anything else if the expression is dependent. if (E->isTypeDependent()) return false; return CheckUnaryExprOrTypeTraitOperand(E, UETT_VecStep); } static void captureVariablyModifiedType(ASTContext &Context, QualType T, CapturingScopeInfo *CSI) { assert(T->isVariablyModifiedType()); assert(CSI != nullptr); // We're going to walk down into the type and look for VLA expressions. do { const Type *Ty = T.getTypePtr(); switch (Ty->getTypeClass()) { #define TYPE(Class, Base) #define ABSTRACT_TYPE(Class, Base) #define NON_CANONICAL_TYPE(Class, Base) #define DEPENDENT_TYPE(Class, Base) case Type::Class: #define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) #include "clang/AST/TypeNodes.inc" T = QualType(); break; // These types are never variably-modified. case Type::Builtin: case Type::Complex: case Type::Vector: case Type::ExtVector: case Type::ConstantMatrix: case Type::Record: case Type::Enum: case Type::Elaborated: case Type::TemplateSpecialization: case Type::ObjCObject: case Type::ObjCInterface: case Type::ObjCObjectPointer: case Type::ObjCTypeParam: case Type::Pipe: case Type::BitInt: llvm_unreachable("type class is never variably-modified!"); case Type::Adjusted: T = cast(Ty)->getOriginalType(); break; case Type::Decayed: T = cast(Ty)->getPointeeType(); break; case Type::Pointer: T = cast(Ty)->getPointeeType(); break; case Type::BlockPointer: T = cast(Ty)->getPointeeType(); break; case Type::LValueReference: case Type::RValueReference: T = cast(Ty)->getPointeeType(); break; case Type::MemberPointer: T = cast(Ty)->getPointeeType(); break; case Type::ConstantArray: case Type::IncompleteArray: // Losing element qualification here is fine. T = cast(Ty)->getElementType(); break; case Type::VariableArray: { // Losing element qualification here is fine. const VariableArrayType *VAT = cast(Ty); // Unknown size indication requires no size computation. // Otherwise, evaluate and record it. auto Size = VAT->getSizeExpr(); if (Size && !CSI->isVLATypeCaptured(VAT) && (isa(CSI) || isa(CSI))) CSI->addVLATypeCapture(Size->getExprLoc(), VAT, Context.getSizeType()); T = VAT->getElementType(); break; } case Type::FunctionProto: case Type::FunctionNoProto: T = cast(Ty)->getReturnType(); break; case Type::Paren: case Type::TypeOf: case Type::UnaryTransform: case Type::Attributed: case Type::BTFTagAttributed: case Type::SubstTemplateTypeParm: case Type::MacroQualified: // Keep walking after single level desugaring. T = T.getSingleStepDesugaredType(Context); break; case Type::Typedef: T = cast(Ty)->desugar(); break; case Type::Decltype: T = cast(Ty)->desugar(); break; case Type::Using: T = cast(Ty)->desugar(); break; case Type::Auto: case Type::DeducedTemplateSpecialization: T = cast(Ty)->getDeducedType(); break; case Type::TypeOfExpr: T = cast(Ty)->getUnderlyingExpr()->getType(); break; case Type::Atomic: T = cast(Ty)->getValueType(); break; } } while (!T.isNull() && T->isVariablyModifiedType()); } /// Build a sizeof or alignof expression given a type operand. ExprResult Sema::CreateUnaryExprOrTypeTraitExpr(TypeSourceInfo *TInfo, SourceLocation OpLoc, UnaryExprOrTypeTrait ExprKind, SourceRange R) { if (!TInfo) return ExprError(); QualType T = TInfo->getType(); if (!T->isDependentType() && CheckUnaryExprOrTypeTraitOperand(T, OpLoc, R, ExprKind)) return ExprError(); if (T->isVariablyModifiedType() && FunctionScopes.size() > 1) { if (auto *TT = T->getAs()) { for (auto I = FunctionScopes.rbegin(), E = std::prev(FunctionScopes.rend()); I != E; ++I) { auto *CSI = dyn_cast(*I); if (CSI == nullptr) break; DeclContext *DC = nullptr; if (auto *LSI = dyn_cast(CSI)) DC = LSI->CallOperator; else if (auto *CRSI = dyn_cast(CSI)) DC = CRSI->TheCapturedDecl; else if (auto *BSI = dyn_cast(CSI)) DC = BSI->TheDecl; if (DC) { if (DC->containsDecl(TT->getDecl())) break; captureVariablyModifiedType(Context, T, CSI); } } } } // C99 6.5.3.4p4: the type (an unsigned integer type) is size_t. if (isUnevaluatedContext() && ExprKind == UETT_SizeOf && TInfo->getType()->isVariablyModifiedType()) TInfo = TransformToPotentiallyEvaluated(TInfo); return new (Context) UnaryExprOrTypeTraitExpr( ExprKind, TInfo, Context.getSizeType(), OpLoc, R.getEnd()); } /// Build a sizeof or alignof expression given an expression /// operand. ExprResult Sema::CreateUnaryExprOrTypeTraitExpr(Expr *E, SourceLocation OpLoc, UnaryExprOrTypeTrait ExprKind) { ExprResult PE = CheckPlaceholderExpr(E); if (PE.isInvalid()) return ExprError(); E = PE.get(); // Verify that the operand is valid. bool isInvalid = false; if (E->isTypeDependent()) { // Delay type-checking for type-dependent expressions. } else if (ExprKind == UETT_AlignOf || ExprKind == UETT_PreferredAlignOf) { isInvalid = CheckAlignOfExpr(*this, E, ExprKind); } else if (ExprKind == UETT_VecStep) { isInvalid = CheckVecStepExpr(E); } else if (ExprKind == UETT_OpenMPRequiredSimdAlign) { Diag(E->getExprLoc(), diag::err_openmp_default_simd_align_expr); isInvalid = true; } else if (E->refersToBitField()) { // C99 6.5.3.4p1. Diag(E->getExprLoc(), diag::err_sizeof_alignof_typeof_bitfield) << 0; isInvalid = true; } else { isInvalid = CheckUnaryExprOrTypeTraitOperand(E, UETT_SizeOf); } if (isInvalid) return ExprError(); if (ExprKind == UETT_SizeOf && E->getType()->isVariableArrayType()) { PE = TransformToPotentiallyEvaluated(E); if (PE.isInvalid()) return ExprError(); E = PE.get(); } // C99 6.5.3.4p4: the type (an unsigned integer type) is size_t. return new (Context) UnaryExprOrTypeTraitExpr( ExprKind, E, Context.getSizeType(), OpLoc, E->getSourceRange().getEnd()); } /// ActOnUnaryExprOrTypeTraitExpr - Handle @c sizeof(type) and @c sizeof @c /// expr and the same for @c alignof and @c __alignof /// Note that the ArgRange is invalid if isType is false. ExprResult Sema::ActOnUnaryExprOrTypeTraitExpr(SourceLocation OpLoc, UnaryExprOrTypeTrait ExprKind, bool IsType, void *TyOrEx, SourceRange ArgRange) { // If error parsing type, ignore. if (!TyOrEx) return ExprError(); if (IsType) { TypeSourceInfo *TInfo; (void) GetTypeFromParser(ParsedType::getFromOpaquePtr(TyOrEx), &TInfo); return CreateUnaryExprOrTypeTraitExpr(TInfo, OpLoc, ExprKind, ArgRange); } Expr *ArgEx = (Expr *)TyOrEx; ExprResult Result = CreateUnaryExprOrTypeTraitExpr(ArgEx, OpLoc, ExprKind); return Result; } static QualType CheckRealImagOperand(Sema &S, ExprResult &V, SourceLocation Loc, bool IsReal) { if (V.get()->isTypeDependent()) return S.Context.DependentTy; // _Real and _Imag are only l-values for normal l-values. if (V.get()->getObjectKind() != OK_Ordinary) { V = S.DefaultLvalueConversion(V.get()); if (V.isInvalid()) return QualType(); } // These operators return the element type of a complex type. if (const ComplexType *CT = V.get()->getType()->getAs()) return CT->getElementType(); // Otherwise they pass through real integer and floating point types here. if (V.get()->getType()->isArithmeticType()) return V.get()->getType(); // Test for placeholders. ExprResult PR = S.CheckPlaceholderExpr(V.get()); if (PR.isInvalid()) return QualType(); if (PR.get() != V.get()) { V = PR; return CheckRealImagOperand(S, V, Loc, IsReal); } // Reject anything else. S.Diag(Loc, diag::err_realimag_invalid_type) << V.get()->getType() << (IsReal ? "__real" : "__imag"); return QualType(); } ExprResult Sema::ActOnPostfixUnaryOp(Scope *S, SourceLocation OpLoc, tok::TokenKind Kind, Expr *Input) { UnaryOperatorKind Opc; switch (Kind) { default: llvm_unreachable("Unknown unary op!"); case tok::plusplus: Opc = UO_PostInc; break; case tok::minusminus: Opc = UO_PostDec; break; } // Since this might is a postfix expression, get rid of ParenListExprs. ExprResult Result = MaybeConvertParenListExprToParenExpr(S, Input); if (Result.isInvalid()) return ExprError(); Input = Result.get(); return BuildUnaryOp(S, OpLoc, Opc, Input); } /// Diagnose if arithmetic on the given ObjC pointer is illegal. /// /// \return true on error static bool checkArithmeticOnObjCPointer(Sema &S, SourceLocation opLoc, Expr *op) { assert(op->getType()->isObjCObjectPointerType()); if (S.LangOpts.ObjCRuntime.allowsPointerArithmetic() && !S.LangOpts.ObjCSubscriptingLegacyRuntime) return false; S.Diag(opLoc, diag::err_arithmetic_nonfragile_interface) << op->getType()->castAs()->getPointeeType() << op->getSourceRange(); return true; } static bool isMSPropertySubscriptExpr(Sema &S, Expr *Base) { auto *BaseNoParens = Base->IgnoreParens(); if (auto *MSProp = dyn_cast(BaseNoParens)) return MSProp->getPropertyDecl()->getType()->isArrayType(); return isa(BaseNoParens); } // Returns the type used for LHS[RHS], given one of LHS, RHS is type-dependent. // Typically this is DependentTy, but can sometimes be more precise. // // There are cases when we could determine a non-dependent type: // - LHS and RHS may have non-dependent types despite being type-dependent // (e.g. unbounded array static members of the current instantiation) // - one may be a dependent-sized array with known element type // - one may be a dependent-typed valid index (enum in current instantiation) // // We *always* return a dependent type, in such cases it is DependentTy. // This avoids creating type-dependent expressions with non-dependent types. // FIXME: is this important to avoid? See https://reviews.llvm.org/D107275 static QualType getDependentArraySubscriptType(Expr *LHS, Expr *RHS, const ASTContext &Ctx) { assert(LHS->isTypeDependent() || RHS->isTypeDependent()); QualType LTy = LHS->getType(), RTy = RHS->getType(); QualType Result = Ctx.DependentTy; if (RTy->isIntegralOrUnscopedEnumerationType()) { if (const PointerType *PT = LTy->getAs()) Result = PT->getPointeeType(); else if (const ArrayType *AT = LTy->getAsArrayTypeUnsafe()) Result = AT->getElementType(); } else if (LTy->isIntegralOrUnscopedEnumerationType()) { if (const PointerType *PT = RTy->getAs()) Result = PT->getPointeeType(); else if (const ArrayType *AT = RTy->getAsArrayTypeUnsafe()) Result = AT->getElementType(); } // Ensure we return a dependent type. return Result->isDependentType() ? Result : Ctx.DependentTy; } static bool checkArgsForPlaceholders(Sema &S, MultiExprArg args); ExprResult Sema::ActOnArraySubscriptExpr(Scope *S, Expr *base, SourceLocation lbLoc, MultiExprArg ArgExprs, SourceLocation rbLoc) { if (base && !base->getType().isNull() && base->hasPlaceholderType(BuiltinType::OMPArraySection)) return ActOnOMPArraySectionExpr(base, lbLoc, ArgExprs.front(), SourceLocation(), SourceLocation(), /*Length*/ nullptr, /*Stride=*/nullptr, rbLoc); // Since this might be a postfix expression, get rid of ParenListExprs. if (isa(base)) { ExprResult result = MaybeConvertParenListExprToParenExpr(S, base); if (result.isInvalid()) return ExprError(); base = result.get(); } // Check if base and idx form a MatrixSubscriptExpr. // // Helper to check for comma expressions, which are not allowed as indices for // matrix subscript expressions. auto CheckAndReportCommaError = [this, base, rbLoc](Expr *E) { if (isa(E) && cast(E)->isCommaOp()) { Diag(E->getExprLoc(), diag::err_matrix_subscript_comma) << SourceRange(base->getBeginLoc(), rbLoc); return true; } return false; }; // The matrix subscript operator ([][])is considered a single operator. // Separating the index expressions by parenthesis is not allowed. if (base->hasPlaceholderType(BuiltinType::IncompleteMatrixIdx) && !isa(base)) { Diag(base->getExprLoc(), diag::err_matrix_separate_incomplete_index) << SourceRange(base->getBeginLoc(), rbLoc); return ExprError(); } // If the base is a MatrixSubscriptExpr, try to create a new // MatrixSubscriptExpr. auto *matSubscriptE = dyn_cast(base); if (matSubscriptE) { assert(ArgExprs.size() == 1); if (CheckAndReportCommaError(ArgExprs.front())) return ExprError(); assert(matSubscriptE->isIncomplete() && "base has to be an incomplete matrix subscript"); return CreateBuiltinMatrixSubscriptExpr(matSubscriptE->getBase(), matSubscriptE->getRowIdx(), ArgExprs.front(), rbLoc); } // Handle any non-overload placeholder types in the base and index // expressions. We can't handle overloads here because the other // operand might be an overloadable type, in which case the overload // resolution for the operator overload should get the first crack // at the overload. bool IsMSPropertySubscript = false; if (base->getType()->isNonOverloadPlaceholderType()) { IsMSPropertySubscript = isMSPropertySubscriptExpr(*this, base); if (!IsMSPropertySubscript) { ExprResult result = CheckPlaceholderExpr(base); if (result.isInvalid()) return ExprError(); base = result.get(); } } // If the base is a matrix type, try to create a new MatrixSubscriptExpr. if (base->getType()->isMatrixType()) { assert(ArgExprs.size() == 1); if (CheckAndReportCommaError(ArgExprs.front())) return ExprError(); return CreateBuiltinMatrixSubscriptExpr(base, ArgExprs.front(), nullptr, rbLoc); } if (ArgExprs.size() == 1 && getLangOpts().CPlusPlus20) { Expr *idx = ArgExprs[0]; if ((isa(idx) && cast(idx)->isCommaOp()) || (isa(idx) && cast(idx)->getOperator() == OO_Comma)) { Diag(idx->getExprLoc(), diag::warn_deprecated_comma_subscript) << SourceRange(base->getBeginLoc(), rbLoc); } } if (ArgExprs.size() == 1 && ArgExprs[0]->getType()->isNonOverloadPlaceholderType()) { ExprResult result = CheckPlaceholderExpr(ArgExprs[0]); if (result.isInvalid()) return ExprError(); ArgExprs[0] = result.get(); } else { if (checkArgsForPlaceholders(*this, ArgExprs)) return ExprError(); } // Build an unanalyzed expression if either operand is type-dependent. if (getLangOpts().CPlusPlus && ArgExprs.size() == 1 && (base->isTypeDependent() || Expr::hasAnyTypeDependentArguments(ArgExprs))) { return new (Context) ArraySubscriptExpr( base, ArgExprs.front(), getDependentArraySubscriptType(base, ArgExprs.front(), getASTContext()), VK_LValue, OK_Ordinary, rbLoc); } // MSDN, property (C++) // https://msdn.microsoft.com/en-us/library/yhfk0thd(v=vs.120).aspx // This attribute can also be used in the declaration of an empty array in a // class or structure definition. For example: // __declspec(property(get=GetX, put=PutX)) int x[]; // The above statement indicates that x[] can be used with one or more array // indices. In this case, i=p->x[a][b] will be turned into i=p->GetX(a, b), // and p->x[a][b] = i will be turned into p->PutX(a, b, i); if (IsMSPropertySubscript) { assert(ArgExprs.size() == 1); // Build MS property subscript expression if base is MS property reference // or MS property subscript. return new (Context) MSPropertySubscriptExpr(base, ArgExprs.front(), Context.PseudoObjectTy, VK_LValue, OK_Ordinary, rbLoc); } // Use C++ overloaded-operator rules if either operand has record // type. The spec says to do this if either type is *overloadable*, // but enum types can't declare subscript operators or conversion // operators, so there's nothing interesting for overload resolution // to do if there aren't any record types involved. // // ObjC pointers have their own subscripting logic that is not tied // to overload resolution and so should not take this path. if (getLangOpts().CPlusPlus && !base->getType()->isObjCObjectPointerType() && ((base->getType()->isRecordType() || (ArgExprs.size() != 1 || ArgExprs[0]->getType()->isRecordType())))) { return CreateOverloadedArraySubscriptExpr(lbLoc, rbLoc, base, ArgExprs); } ExprResult Res = CreateBuiltinArraySubscriptExpr(base, lbLoc, ArgExprs.front(), rbLoc); if (!Res.isInvalid() && isa(Res.get())) CheckSubscriptAccessOfNoDeref(cast(Res.get())); return Res; } ExprResult Sema::tryConvertExprToType(Expr *E, QualType Ty) { InitializedEntity Entity = InitializedEntity::InitializeTemporary(Ty); InitializationKind Kind = InitializationKind::CreateCopy(E->getBeginLoc(), SourceLocation()); InitializationSequence InitSeq(*this, Entity, Kind, E); return InitSeq.Perform(*this, Entity, Kind, E); } ExprResult Sema::CreateBuiltinMatrixSubscriptExpr(Expr *Base, Expr *RowIdx, Expr *ColumnIdx, SourceLocation RBLoc) { ExprResult BaseR = CheckPlaceholderExpr(Base); if (BaseR.isInvalid()) return BaseR; Base = BaseR.get(); ExprResult RowR = CheckPlaceholderExpr(RowIdx); if (RowR.isInvalid()) return RowR; RowIdx = RowR.get(); if (!ColumnIdx) return new (Context) MatrixSubscriptExpr( Base, RowIdx, ColumnIdx, Context.IncompleteMatrixIdxTy, RBLoc); // Build an unanalyzed expression if any of the operands is type-dependent. if (Base->isTypeDependent() || RowIdx->isTypeDependent() || ColumnIdx->isTypeDependent()) return new (Context) MatrixSubscriptExpr(Base, RowIdx, ColumnIdx, Context.DependentTy, RBLoc); ExprResult ColumnR = CheckPlaceholderExpr(ColumnIdx); if (ColumnR.isInvalid()) return ColumnR; ColumnIdx = ColumnR.get(); // Check that IndexExpr is an integer expression. If it is a constant // expression, check that it is less than Dim (= the number of elements in the // corresponding dimension). auto IsIndexValid = [&](Expr *IndexExpr, unsigned Dim, bool IsColumnIdx) -> Expr * { if (!IndexExpr->getType()->isIntegerType() && !IndexExpr->isTypeDependent()) { Diag(IndexExpr->getBeginLoc(), diag::err_matrix_index_not_integer) << IsColumnIdx; return nullptr; } if (Optional Idx = IndexExpr->getIntegerConstantExpr(Context)) { if ((*Idx < 0 || *Idx >= Dim)) { Diag(IndexExpr->getBeginLoc(), diag::err_matrix_index_outside_range) << IsColumnIdx << Dim; return nullptr; } } ExprResult ConvExpr = tryConvertExprToType(IndexExpr, Context.getSizeType()); assert(!ConvExpr.isInvalid() && "should be able to convert any integer type to size type"); return ConvExpr.get(); }; auto *MTy = Base->getType()->getAs(); RowIdx = IsIndexValid(RowIdx, MTy->getNumRows(), false); ColumnIdx = IsIndexValid(ColumnIdx, MTy->getNumColumns(), true); if (!RowIdx || !ColumnIdx) return ExprError(); return new (Context) MatrixSubscriptExpr(Base, RowIdx, ColumnIdx, MTy->getElementType(), RBLoc); } void Sema::CheckAddressOfNoDeref(const Expr *E) { ExpressionEvaluationContextRecord &LastRecord = ExprEvalContexts.back(); const Expr *StrippedExpr = E->IgnoreParenImpCasts(); // For expressions like `&(*s).b`, the base is recorded and what should be // checked. const MemberExpr *Member = nullptr; while ((Member = dyn_cast(StrippedExpr)) && !Member->isArrow()) StrippedExpr = Member->getBase()->IgnoreParenImpCasts(); LastRecord.PossibleDerefs.erase(StrippedExpr); } void Sema::CheckSubscriptAccessOfNoDeref(const ArraySubscriptExpr *E) { if (isUnevaluatedContext()) return; QualType ResultTy = E->getType(); ExpressionEvaluationContextRecord &LastRecord = ExprEvalContexts.back(); // Bail if the element is an array since it is not memory access. if (isa(ResultTy)) return; if (ResultTy->hasAttr(attr::NoDeref)) { LastRecord.PossibleDerefs.insert(E); return; } // Check if the base type is a pointer to a member access of a struct // marked with noderef. const Expr *Base = E->getBase(); QualType BaseTy = Base->getType(); if (!(isa(BaseTy) || isa(BaseTy))) // Not a pointer access return; const MemberExpr *Member = nullptr; while ((Member = dyn_cast(Base->IgnoreParenCasts())) && Member->isArrow()) Base = Member->getBase(); if (const auto *Ptr = dyn_cast(Base->getType())) { if (Ptr->getPointeeType()->hasAttr(attr::NoDeref)) LastRecord.PossibleDerefs.insert(E); } } ExprResult Sema::ActOnOMPArraySectionExpr(Expr *Base, SourceLocation LBLoc, Expr *LowerBound, SourceLocation ColonLocFirst, SourceLocation ColonLocSecond, Expr *Length, Expr *Stride, SourceLocation RBLoc) { if (Base->hasPlaceholderType() && !Base->hasPlaceholderType(BuiltinType::OMPArraySection)) { ExprResult Result = CheckPlaceholderExpr(Base); if (Result.isInvalid()) return ExprError(); Base = Result.get(); } if (LowerBound && LowerBound->getType()->isNonOverloadPlaceholderType()) { ExprResult Result = CheckPlaceholderExpr(LowerBound); if (Result.isInvalid()) return ExprError(); Result = DefaultLvalueConversion(Result.get()); if (Result.isInvalid()) return ExprError(); LowerBound = Result.get(); } if (Length && Length->getType()->isNonOverloadPlaceholderType()) { ExprResult Result = CheckPlaceholderExpr(Length); if (Result.isInvalid()) return ExprError(); Result = DefaultLvalueConversion(Result.get()); if (Result.isInvalid()) return ExprError(); Length = Result.get(); } if (Stride && Stride->getType()->isNonOverloadPlaceholderType()) { ExprResult Result = CheckPlaceholderExpr(Stride); if (Result.isInvalid()) return ExprError(); Result = DefaultLvalueConversion(Result.get()); if (Result.isInvalid()) return ExprError(); Stride = Result.get(); } // Build an unanalyzed expression if either operand is type-dependent. if (Base->isTypeDependent() || (LowerBound && (LowerBound->isTypeDependent() || LowerBound->isValueDependent())) || (Length && (Length->isTypeDependent() || Length->isValueDependent())) || (Stride && (Stride->isTypeDependent() || Stride->isValueDependent()))) { return new (Context) OMPArraySectionExpr( Base, LowerBound, Length, Stride, Context.DependentTy, VK_LValue, OK_Ordinary, ColonLocFirst, ColonLocSecond, RBLoc); } // Perform default conversions. QualType OriginalTy = OMPArraySectionExpr::getBaseOriginalType(Base); QualType ResultTy; if (OriginalTy->isAnyPointerType()) { ResultTy = OriginalTy->getPointeeType(); } else if (OriginalTy->isArrayType()) { ResultTy = OriginalTy->getAsArrayTypeUnsafe()->getElementType(); } else { return ExprError( Diag(Base->getExprLoc(), diag::err_omp_typecheck_section_value) << Base->getSourceRange()); } // C99 6.5.2.1p1 if (LowerBound) { auto Res = PerformOpenMPImplicitIntegerConversion(LowerBound->getExprLoc(), LowerBound); if (Res.isInvalid()) return ExprError(Diag(LowerBound->getExprLoc(), diag::err_omp_typecheck_section_not_integer) << 0 << LowerBound->getSourceRange()); LowerBound = Res.get(); if (LowerBound->getType()->isSpecificBuiltinType(BuiltinType::Char_S) || LowerBound->getType()->isSpecificBuiltinType(BuiltinType::Char_U)) Diag(LowerBound->getExprLoc(), diag::warn_omp_section_is_char) << 0 << LowerBound->getSourceRange(); } if (Length) { auto Res = PerformOpenMPImplicitIntegerConversion(Length->getExprLoc(), Length); if (Res.isInvalid()) return ExprError(Diag(Length->getExprLoc(), diag::err_omp_typecheck_section_not_integer) << 1 << Length->getSourceRange()); Length = Res.get(); if (Length->getType()->isSpecificBuiltinType(BuiltinType::Char_S) || Length->getType()->isSpecificBuiltinType(BuiltinType::Char_U)) Diag(Length->getExprLoc(), diag::warn_omp_section_is_char) << 1 << Length->getSourceRange(); } if (Stride) { ExprResult Res = PerformOpenMPImplicitIntegerConversion(Stride->getExprLoc(), Stride); if (Res.isInvalid()) return ExprError(Diag(Stride->getExprLoc(), diag::err_omp_typecheck_section_not_integer) << 1 << Stride->getSourceRange()); Stride = Res.get(); if (Stride->getType()->isSpecificBuiltinType(BuiltinType::Char_S) || Stride->getType()->isSpecificBuiltinType(BuiltinType::Char_U)) Diag(Stride->getExprLoc(), diag::warn_omp_section_is_char) << 1 << Stride->getSourceRange(); } // C99 6.5.2.1p1: "shall have type "pointer to *object* type". Similarly, // C++ [expr.sub]p1: The type "T" shall be a completely-defined object // type. Note that functions are not objects, and that (in C99 parlance) // incomplete types are not object types. if (ResultTy->isFunctionType()) { Diag(Base->getExprLoc(), diag::err_omp_section_function_type) << ResultTy << Base->getSourceRange(); return ExprError(); } if (RequireCompleteType(Base->getExprLoc(), ResultTy, diag::err_omp_section_incomplete_type, Base)) return ExprError(); if (LowerBound && !OriginalTy->isAnyPointerType()) { Expr::EvalResult Result; if (LowerBound->EvaluateAsInt(Result, Context)) { // OpenMP 5.0, [2.1.5 Array Sections] // The array section must be a subset of the original array. llvm::APSInt LowerBoundValue = Result.Val.getInt(); if (LowerBoundValue.isNegative()) { Diag(LowerBound->getExprLoc(), diag::err_omp_section_not_subset_of_array) << LowerBound->getSourceRange(); return ExprError(); } } } if (Length) { Expr::EvalResult Result; if (Length->EvaluateAsInt(Result, Context)) { // OpenMP 5.0, [2.1.5 Array Sections] // The length must evaluate to non-negative integers. llvm::APSInt LengthValue = Result.Val.getInt(); if (LengthValue.isNegative()) { Diag(Length->getExprLoc(), diag::err_omp_section_length_negative) << toString(LengthValue, /*Radix=*/10, /*Signed=*/true) << Length->getSourceRange(); return ExprError(); } } } else if (ColonLocFirst.isValid() && (OriginalTy.isNull() || (!OriginalTy->isConstantArrayType() && !OriginalTy->isVariableArrayType()))) { // OpenMP 5.0, [2.1.5 Array Sections] // When the size of the array dimension is not known, the length must be // specified explicitly. Diag(ColonLocFirst, diag::err_omp_section_length_undefined) << (!OriginalTy.isNull() && OriginalTy->isArrayType()); return ExprError(); } if (Stride) { Expr::EvalResult Result; if (Stride->EvaluateAsInt(Result, Context)) { // OpenMP 5.0, [2.1.5 Array Sections] // The stride must evaluate to a positive integer. llvm::APSInt StrideValue = Result.Val.getInt(); if (!StrideValue.isStrictlyPositive()) { Diag(Stride->getExprLoc(), diag::err_omp_section_stride_non_positive) << toString(StrideValue, /*Radix=*/10, /*Signed=*/true) << Stride->getSourceRange(); return ExprError(); } } } if (!Base->hasPlaceholderType(BuiltinType::OMPArraySection)) { ExprResult Result = DefaultFunctionArrayLvalueConversion(Base); if (Result.isInvalid()) return ExprError(); Base = Result.get(); } return new (Context) OMPArraySectionExpr( Base, LowerBound, Length, Stride, Context.OMPArraySectionTy, VK_LValue, OK_Ordinary, ColonLocFirst, ColonLocSecond, RBLoc); } ExprResult Sema::ActOnOMPArrayShapingExpr(Expr *Base, SourceLocation LParenLoc, SourceLocation RParenLoc, ArrayRef Dims, ArrayRef Brackets) { if (Base->hasPlaceholderType()) { ExprResult Result = CheckPlaceholderExpr(Base); if (Result.isInvalid()) return ExprError(); Result = DefaultLvalueConversion(Result.get()); if (Result.isInvalid()) return ExprError(); Base = Result.get(); } QualType BaseTy = Base->getType(); // Delay analysis of the types/expressions if instantiation/specialization is // required. if (!BaseTy->isPointerType() && Base->isTypeDependent()) return OMPArrayShapingExpr::Create(Context, Context.DependentTy, Base, LParenLoc, RParenLoc, Dims, Brackets); if (!BaseTy->isPointerType() || (!Base->isTypeDependent() && BaseTy->getPointeeType()->isIncompleteType())) return ExprError(Diag(Base->getExprLoc(), diag::err_omp_non_pointer_type_array_shaping_base) << Base->getSourceRange()); SmallVector NewDims; bool ErrorFound = false; for (Expr *Dim : Dims) { if (Dim->hasPlaceholderType()) { ExprResult Result = CheckPlaceholderExpr(Dim); if (Result.isInvalid()) { ErrorFound = true; continue; } Result = DefaultLvalueConversion(Result.get()); if (Result.isInvalid()) { ErrorFound = true; continue; } Dim = Result.get(); } if (!Dim->isTypeDependent()) { ExprResult Result = PerformOpenMPImplicitIntegerConversion(Dim->getExprLoc(), Dim); if (Result.isInvalid()) { ErrorFound = true; Diag(Dim->getExprLoc(), diag::err_omp_typecheck_shaping_not_integer) << Dim->getSourceRange(); continue; } Dim = Result.get(); Expr::EvalResult EvResult; if (!Dim->isValueDependent() && Dim->EvaluateAsInt(EvResult, Context)) { // OpenMP 5.0, [2.1.4 Array Shaping] // Each si is an integral type expression that must evaluate to a // positive integer. llvm::APSInt Value = EvResult.Val.getInt(); if (!Value.isStrictlyPositive()) { Diag(Dim->getExprLoc(), diag::err_omp_shaping_dimension_not_positive) << toString(Value, /*Radix=*/10, /*Signed=*/true) << Dim->getSourceRange(); ErrorFound = true; continue; } } } NewDims.push_back(Dim); } if (ErrorFound) return ExprError(); return OMPArrayShapingExpr::Create(Context, Context.OMPArrayShapingTy, Base, LParenLoc, RParenLoc, NewDims, Brackets); } ExprResult Sema::ActOnOMPIteratorExpr(Scope *S, SourceLocation IteratorKwLoc, SourceLocation LLoc, SourceLocation RLoc, ArrayRef Data) { SmallVector ID; bool IsCorrect = true; for (const OMPIteratorData &D : Data) { TypeSourceInfo *TInfo = nullptr; SourceLocation StartLoc; QualType DeclTy; if (!D.Type.getAsOpaquePtr()) { // OpenMP 5.0, 2.1.6 Iterators // In an iterator-specifier, if the iterator-type is not specified then // the type of that iterator is of int type. DeclTy = Context.IntTy; StartLoc = D.DeclIdentLoc; } else { DeclTy = GetTypeFromParser(D.Type, &TInfo); StartLoc = TInfo->getTypeLoc().getBeginLoc(); } bool IsDeclTyDependent = DeclTy->isDependentType() || DeclTy->containsUnexpandedParameterPack() || DeclTy->isInstantiationDependentType(); if (!IsDeclTyDependent) { if (!DeclTy->isIntegralType(Context) && !DeclTy->isAnyPointerType()) { // OpenMP 5.0, 2.1.6 Iterators, Restrictions, C/C++ // The iterator-type must be an integral or pointer type. Diag(StartLoc, diag::err_omp_iterator_not_integral_or_pointer) << DeclTy; IsCorrect = false; continue; } if (DeclTy.isConstant(Context)) { // OpenMP 5.0, 2.1.6 Iterators, Restrictions, C/C++ // The iterator-type must not be const qualified. Diag(StartLoc, diag::err_omp_iterator_not_integral_or_pointer) << DeclTy; IsCorrect = false; continue; } } // Iterator declaration. assert(D.DeclIdent && "Identifier expected."); // Always try to create iterator declarator to avoid extra error messages // about unknown declarations use. auto *VD = VarDecl::Create(Context, CurContext, StartLoc, D.DeclIdentLoc, D.DeclIdent, DeclTy, TInfo, SC_None); VD->setImplicit(); if (S) { // Check for conflicting previous declaration. DeclarationNameInfo NameInfo(VD->getDeclName(), D.DeclIdentLoc); LookupResult Previous(*this, NameInfo, LookupOrdinaryName, ForVisibleRedeclaration); Previous.suppressDiagnostics(); LookupName(Previous, S); FilterLookupForScope(Previous, CurContext, S, /*ConsiderLinkage=*/false, /*AllowInlineNamespace=*/false); if (!Previous.empty()) { NamedDecl *Old = Previous.getRepresentativeDecl(); Diag(D.DeclIdentLoc, diag::err_redefinition) << VD->getDeclName(); Diag(Old->getLocation(), diag::note_previous_definition); } else { PushOnScopeChains(VD, S); } } else { CurContext->addDecl(VD); } Expr *Begin = D.Range.Begin; if (!IsDeclTyDependent && Begin && !Begin->isTypeDependent()) { ExprResult BeginRes = PerformImplicitConversion(Begin, DeclTy, AA_Converting); Begin = BeginRes.get(); } Expr *End = D.Range.End; if (!IsDeclTyDependent && End && !End->isTypeDependent()) { ExprResult EndRes = PerformImplicitConversion(End, DeclTy, AA_Converting); End = EndRes.get(); } Expr *Step = D.Range.Step; if (!IsDeclTyDependent && Step && !Step->isTypeDependent()) { if (!Step->getType()->isIntegralType(Context)) { Diag(Step->getExprLoc(), diag::err_omp_iterator_step_not_integral) << Step << Step->getSourceRange(); IsCorrect = false; continue; } Optional Result = Step->getIntegerConstantExpr(Context); // OpenMP 5.0, 2.1.6 Iterators, Restrictions // If the step expression of a range-specification equals zero, the // behavior is unspecified. if (Result && Result->isZero()) { Diag(Step->getExprLoc(), diag::err_omp_iterator_step_constant_zero) << Step << Step->getSourceRange(); IsCorrect = false; continue; } } if (!Begin || !End || !IsCorrect) { IsCorrect = false; continue; } OMPIteratorExpr::IteratorDefinition &IDElem = ID.emplace_back(); IDElem.IteratorDecl = VD; IDElem.AssignmentLoc = D.AssignLoc; IDElem.Range.Begin = Begin; IDElem.Range.End = End; IDElem.Range.Step = Step; IDElem.ColonLoc = D.ColonLoc; IDElem.SecondColonLoc = D.SecColonLoc; } if (!IsCorrect) { // Invalidate all created iterator declarations if error is found. for (const OMPIteratorExpr::IteratorDefinition &D : ID) { if (Decl *ID = D.IteratorDecl) ID->setInvalidDecl(); } return ExprError(); } SmallVector Helpers; if (!CurContext->isDependentContext()) { // Build number of ityeration for each iteration range. // Ni = ((Stepi > 0) ? ((Endi + Stepi -1 - Begini)/Stepi) : // ((Begini-Stepi-1-Endi) / -Stepi); for (OMPIteratorExpr::IteratorDefinition &D : ID) { // (Endi - Begini) ExprResult Res = CreateBuiltinBinOp(D.AssignmentLoc, BO_Sub, D.Range.End, D.Range.Begin); if(!Res.isUsable()) { IsCorrect = false; continue; } ExprResult St, St1; if (D.Range.Step) { St = D.Range.Step; // (Endi - Begini) + Stepi Res = CreateBuiltinBinOp(D.AssignmentLoc, BO_Add, Res.get(), St.get()); if (!Res.isUsable()) { IsCorrect = false; continue; } // (Endi - Begini) + Stepi - 1 Res = CreateBuiltinBinOp(D.AssignmentLoc, BO_Sub, Res.get(), ActOnIntegerConstant(D.AssignmentLoc, 1).get()); if (!Res.isUsable()) { IsCorrect = false; continue; } // ((Endi - Begini) + Stepi - 1) / Stepi Res = CreateBuiltinBinOp(D.AssignmentLoc, BO_Div, Res.get(), St.get()); if (!Res.isUsable()) { IsCorrect = false; continue; } St1 = CreateBuiltinUnaryOp(D.AssignmentLoc, UO_Minus, D.Range.Step); // (Begini - Endi) ExprResult Res1 = CreateBuiltinBinOp(D.AssignmentLoc, BO_Sub, D.Range.Begin, D.Range.End); if (!Res1.isUsable()) { IsCorrect = false; continue; } // (Begini - Endi) - Stepi Res1 = CreateBuiltinBinOp(D.AssignmentLoc, BO_Add, Res1.get(), St1.get()); if (!Res1.isUsable()) { IsCorrect = false; continue; } // (Begini - Endi) - Stepi - 1 Res1 = CreateBuiltinBinOp(D.AssignmentLoc, BO_Sub, Res1.get(), ActOnIntegerConstant(D.AssignmentLoc, 1).get()); if (!Res1.isUsable()) { IsCorrect = false; continue; } // ((Begini - Endi) - Stepi - 1) / (-Stepi) Res1 = CreateBuiltinBinOp(D.AssignmentLoc, BO_Div, Res1.get(), St1.get()); if (!Res1.isUsable()) { IsCorrect = false; continue; } // Stepi > 0. ExprResult CmpRes = CreateBuiltinBinOp(D.AssignmentLoc, BO_GT, D.Range.Step, ActOnIntegerConstant(D.AssignmentLoc, 0).get()); if (!CmpRes.isUsable()) { IsCorrect = false; continue; } Res = ActOnConditionalOp(D.AssignmentLoc, D.AssignmentLoc, CmpRes.get(), Res.get(), Res1.get()); if (!Res.isUsable()) { IsCorrect = false; continue; } } Res = ActOnFinishFullExpr(Res.get(), /*DiscardedValue=*/false); if (!Res.isUsable()) { IsCorrect = false; continue; } // Build counter update. // Build counter. auto *CounterVD = VarDecl::Create(Context, CurContext, D.IteratorDecl->getBeginLoc(), D.IteratorDecl->getBeginLoc(), nullptr, Res.get()->getType(), nullptr, SC_None); CounterVD->setImplicit(); ExprResult RefRes = BuildDeclRefExpr(CounterVD, CounterVD->getType(), VK_LValue, D.IteratorDecl->getBeginLoc()); // Build counter update. // I = Begini + counter * Stepi; ExprResult UpdateRes; if (D.Range.Step) { UpdateRes = CreateBuiltinBinOp( D.AssignmentLoc, BO_Mul, DefaultLvalueConversion(RefRes.get()).get(), St.get()); } else { UpdateRes = DefaultLvalueConversion(RefRes.get()); } if (!UpdateRes.isUsable()) { IsCorrect = false; continue; } UpdateRes = CreateBuiltinBinOp(D.AssignmentLoc, BO_Add, D.Range.Begin, UpdateRes.get()); if (!UpdateRes.isUsable()) { IsCorrect = false; continue; } ExprResult VDRes = BuildDeclRefExpr(cast(D.IteratorDecl), cast(D.IteratorDecl)->getType(), VK_LValue, D.IteratorDecl->getBeginLoc()); UpdateRes = CreateBuiltinBinOp(D.AssignmentLoc, BO_Assign, VDRes.get(), UpdateRes.get()); if (!UpdateRes.isUsable()) { IsCorrect = false; continue; } UpdateRes = ActOnFinishFullExpr(UpdateRes.get(), /*DiscardedValue=*/true); if (!UpdateRes.isUsable()) { IsCorrect = false; continue; } ExprResult CounterUpdateRes = CreateBuiltinUnaryOp(D.AssignmentLoc, UO_PreInc, RefRes.get()); if (!CounterUpdateRes.isUsable()) { IsCorrect = false; continue; } CounterUpdateRes = ActOnFinishFullExpr(CounterUpdateRes.get(), /*DiscardedValue=*/true); if (!CounterUpdateRes.isUsable()) { IsCorrect = false; continue; } OMPIteratorHelperData &HD = Helpers.emplace_back(); HD.CounterVD = CounterVD; HD.Upper = Res.get(); HD.Update = UpdateRes.get(); HD.CounterUpdate = CounterUpdateRes.get(); } } else { Helpers.assign(ID.size(), {}); } if (!IsCorrect) { // Invalidate all created iterator declarations if error is found. for (const OMPIteratorExpr::IteratorDefinition &D : ID) { if (Decl *ID = D.IteratorDecl) ID->setInvalidDecl(); } return ExprError(); } return OMPIteratorExpr::Create(Context, Context.OMPIteratorTy, IteratorKwLoc, LLoc, RLoc, ID, Helpers); } ExprResult Sema::CreateBuiltinArraySubscriptExpr(Expr *Base, SourceLocation LLoc, Expr *Idx, SourceLocation RLoc) { Expr *LHSExp = Base; Expr *RHSExp = Idx; ExprValueKind VK = VK_LValue; ExprObjectKind OK = OK_Ordinary; // Per C++ core issue 1213, the result is an xvalue if either operand is // a non-lvalue array, and an lvalue otherwise. if (getLangOpts().CPlusPlus11) { for (auto *Op : {LHSExp, RHSExp}) { Op = Op->IgnoreImplicit(); if (Op->getType()->isArrayType() && !Op->isLValue()) VK = VK_XValue; } } // Perform default conversions. if (!LHSExp->getType()->getAs()) { ExprResult Result = DefaultFunctionArrayLvalueConversion(LHSExp); if (Result.isInvalid()) return ExprError(); LHSExp = Result.get(); } ExprResult Result = DefaultFunctionArrayLvalueConversion(RHSExp); if (Result.isInvalid()) return ExprError(); RHSExp = Result.get(); QualType LHSTy = LHSExp->getType(), RHSTy = RHSExp->getType(); // C99 6.5.2.1p2: the expression e1[e2] is by definition precisely equivalent // to the expression *((e1)+(e2)). This means the array "Base" may actually be // in the subscript position. As a result, we need to derive the array base // and index from the expression types. Expr *BaseExpr, *IndexExpr; QualType ResultType; if (LHSTy->isDependentType() || RHSTy->isDependentType()) { BaseExpr = LHSExp; IndexExpr = RHSExp; ResultType = getDependentArraySubscriptType(LHSExp, RHSExp, getASTContext()); } else if (const PointerType *PTy = LHSTy->getAs()) { BaseExpr = LHSExp; IndexExpr = RHSExp; ResultType = PTy->getPointeeType(); } else if (const ObjCObjectPointerType *PTy = LHSTy->getAs()) { BaseExpr = LHSExp; IndexExpr = RHSExp; // Use custom logic if this should be the pseudo-object subscript // expression. if (!LangOpts.isSubscriptPointerArithmetic()) return BuildObjCSubscriptExpression(RLoc, BaseExpr, IndexExpr, nullptr, nullptr); ResultType = PTy->getPointeeType(); } else if (const PointerType *PTy = RHSTy->getAs()) { // Handle the uncommon case of "123[Ptr]". BaseExpr = RHSExp; IndexExpr = LHSExp; ResultType = PTy->getPointeeType(); } else if (const ObjCObjectPointerType *PTy = RHSTy->getAs()) { // Handle the uncommon case of "123[Ptr]". BaseExpr = RHSExp; IndexExpr = LHSExp; ResultType = PTy->getPointeeType(); if (!LangOpts.isSubscriptPointerArithmetic()) { Diag(LLoc, diag::err_subscript_nonfragile_interface) << ResultType << BaseExpr->getSourceRange(); return ExprError(); } } else if (const VectorType *VTy = LHSTy->getAs()) { BaseExpr = LHSExp; // vectors: V[123] IndexExpr = RHSExp; // We apply C++ DR1213 to vector subscripting too. if (getLangOpts().CPlusPlus11 && LHSExp->isPRValue()) { ExprResult Materialized = TemporaryMaterializationConversion(LHSExp); if (Materialized.isInvalid()) return ExprError(); LHSExp = Materialized.get(); } VK = LHSExp->getValueKind(); if (VK != VK_PRValue) OK = OK_VectorComponent; ResultType = VTy->getElementType(); QualType BaseType = BaseExpr->getType(); Qualifiers BaseQuals = BaseType.getQualifiers(); Qualifiers MemberQuals = ResultType.getQualifiers(); Qualifiers Combined = BaseQuals + MemberQuals; if (Combined != MemberQuals) ResultType = Context.getQualifiedType(ResultType, Combined); } else if (LHSTy->isBuiltinType() && LHSTy->getAs()->isVLSTBuiltinType()) { const BuiltinType *BTy = LHSTy->getAs(); if (BTy->isSVEBool()) return ExprError(Diag(LLoc, diag::err_subscript_svbool_t) << LHSExp->getSourceRange() << RHSExp->getSourceRange()); BaseExpr = LHSExp; IndexExpr = RHSExp; if (getLangOpts().CPlusPlus11 && LHSExp->isPRValue()) { ExprResult Materialized = TemporaryMaterializationConversion(LHSExp); if (Materialized.isInvalid()) return ExprError(); LHSExp = Materialized.get(); } VK = LHSExp->getValueKind(); if (VK != VK_PRValue) OK = OK_VectorComponent; ResultType = BTy->getSveEltType(Context); QualType BaseType = BaseExpr->getType(); Qualifiers BaseQuals = BaseType.getQualifiers(); Qualifiers MemberQuals = ResultType.getQualifiers(); Qualifiers Combined = BaseQuals + MemberQuals; if (Combined != MemberQuals) ResultType = Context.getQualifiedType(ResultType, Combined); } else if (LHSTy->isArrayType()) { // If we see an array that wasn't promoted by // DefaultFunctionArrayLvalueConversion, it must be an array that // wasn't promoted because of the C90 rule that doesn't // allow promoting non-lvalue arrays. Warn, then // force the promotion here. Diag(LHSExp->getBeginLoc(), diag::ext_subscript_non_lvalue) << LHSExp->getSourceRange(); LHSExp = ImpCastExprToType(LHSExp, Context.getArrayDecayedType(LHSTy), CK_ArrayToPointerDecay).get(); LHSTy = LHSExp->getType(); BaseExpr = LHSExp; IndexExpr = RHSExp; ResultType = LHSTy->castAs()->getPointeeType(); } else if (RHSTy->isArrayType()) { // Same as previous, except for 123[f().a] case Diag(RHSExp->getBeginLoc(), diag::ext_subscript_non_lvalue) << RHSExp->getSourceRange(); RHSExp = ImpCastExprToType(RHSExp, Context.getArrayDecayedType(RHSTy), CK_ArrayToPointerDecay).get(); RHSTy = RHSExp->getType(); BaseExpr = RHSExp; IndexExpr = LHSExp; ResultType = RHSTy->castAs()->getPointeeType(); } else { return ExprError(Diag(LLoc, diag::err_typecheck_subscript_value) << LHSExp->getSourceRange() << RHSExp->getSourceRange()); } // C99 6.5.2.1p1 if (!IndexExpr->getType()->isIntegerType() && !IndexExpr->isTypeDependent()) return ExprError(Diag(LLoc, diag::err_typecheck_subscript_not_integer) << IndexExpr->getSourceRange()); if ((IndexExpr->getType()->isSpecificBuiltinType(BuiltinType::Char_S) || IndexExpr->getType()->isSpecificBuiltinType(BuiltinType::Char_U)) && !IndexExpr->isTypeDependent()) Diag(LLoc, diag::warn_subscript_is_char) << IndexExpr->getSourceRange(); // C99 6.5.2.1p1: "shall have type "pointer to *object* type". Similarly, // C++ [expr.sub]p1: The type "T" shall be a completely-defined object // type. Note that Functions are not objects, and that (in C99 parlance) // incomplete types are not object types. if (ResultType->isFunctionType()) { Diag(BaseExpr->getBeginLoc(), diag::err_subscript_function_type) << ResultType << BaseExpr->getSourceRange(); return ExprError(); } if (ResultType->isVoidType() && !getLangOpts().CPlusPlus) { // GNU extension: subscripting on pointer to void Diag(LLoc, diag::ext_gnu_subscript_void_type) << BaseExpr->getSourceRange(); // C forbids expressions of unqualified void type from being l-values. // See IsCForbiddenLValueType. if (!ResultType.hasQualifiers()) VK = VK_PRValue; } else if (!ResultType->isDependentType() && RequireCompleteSizedType( LLoc, ResultType, diag::err_subscript_incomplete_or_sizeless_type, BaseExpr)) return ExprError(); assert(VK == VK_PRValue || LangOpts.CPlusPlus || !ResultType.isCForbiddenLValueType()); if (LHSExp->IgnoreParenImpCasts()->getType()->isVariablyModifiedType() && FunctionScopes.size() > 1) { if (auto *TT = LHSExp->IgnoreParenImpCasts()->getType()->getAs()) { for (auto I = FunctionScopes.rbegin(), E = std::prev(FunctionScopes.rend()); I != E; ++I) { auto *CSI = dyn_cast(*I); if (CSI == nullptr) break; DeclContext *DC = nullptr; if (auto *LSI = dyn_cast(CSI)) DC = LSI->CallOperator; else if (auto *CRSI = dyn_cast(CSI)) DC = CRSI->TheCapturedDecl; else if (auto *BSI = dyn_cast(CSI)) DC = BSI->TheDecl; if (DC) { if (DC->containsDecl(TT->getDecl())) break; captureVariablyModifiedType( Context, LHSExp->IgnoreParenImpCasts()->getType(), CSI); } } } } return new (Context) ArraySubscriptExpr(LHSExp, RHSExp, ResultType, VK, OK, RLoc); } bool Sema::CheckCXXDefaultArgExpr(SourceLocation CallLoc, FunctionDecl *FD, ParmVarDecl *Param) { if (Param->hasUnparsedDefaultArg()) { // If we've already cleared out the location for the default argument, // that means we're parsing it right now. if (!UnparsedDefaultArgLocs.count(Param)) { Diag(Param->getBeginLoc(), diag::err_recursive_default_argument) << FD; Diag(CallLoc, diag::note_recursive_default_argument_used_here); Param->setInvalidDecl(); return true; } Diag(CallLoc, diag::err_use_of_default_argument_to_function_declared_later) << FD << cast(FD->getDeclContext()); Diag(UnparsedDefaultArgLocs[Param], diag::note_default_argument_declared_here); return true; } if (Param->hasUninstantiatedDefaultArg() && InstantiateDefaultArgument(CallLoc, FD, Param)) return true; assert(Param->hasInit() && "default argument but no initializer?"); // If the default expression creates temporaries, we need to // push them to the current stack of expression temporaries so they'll // be properly destroyed. // FIXME: We should really be rebuilding the default argument with new // bound temporaries; see the comment in PR5810. // We don't need to do that with block decls, though, because // blocks in default argument expression can never capture anything. if (auto Init = dyn_cast(Param->getInit())) { // Set the "needs cleanups" bit regardless of whether there are // any explicit objects. Cleanup.setExprNeedsCleanups(Init->cleanupsHaveSideEffects()); // Append all the objects to the cleanup list. Right now, this // should always be a no-op, because blocks in default argument // expressions should never be able to capture anything. assert(!Init->getNumObjects() && "default argument expression has capturing blocks?"); } // We already type-checked the argument, so we know it works. // Just mark all of the declarations in this potentially-evaluated expression // as being "referenced". EnterExpressionEvaluationContext EvalContext( *this, ExpressionEvaluationContext::PotentiallyEvaluated, Param); MarkDeclarationsReferencedInExpr(Param->getDefaultArg(), /*SkipLocalVariables=*/true); return false; } ExprResult Sema::BuildCXXDefaultArgExpr(SourceLocation CallLoc, FunctionDecl *FD, ParmVarDecl *Param) { assert(Param->hasDefaultArg() && "can't build nonexistent default arg"); if (CheckCXXDefaultArgExpr(CallLoc, FD, Param)) return ExprError(); return CXXDefaultArgExpr::Create(Context, CallLoc, Param, CurContext); } Sema::VariadicCallType Sema::getVariadicCallType(FunctionDecl *FDecl, const FunctionProtoType *Proto, Expr *Fn) { if (Proto && Proto->isVariadic()) { if (isa_and_nonnull(FDecl)) return VariadicConstructor; else if (Fn && Fn->getType()->isBlockPointerType()) return VariadicBlock; else if (FDecl) { if (CXXMethodDecl *Method = dyn_cast_or_null(FDecl)) if (Method->isInstance()) return VariadicMethod; } else if (Fn && Fn->getType() == Context.BoundMemberTy) return VariadicMethod; return VariadicFunction; } return VariadicDoesNotApply; } namespace { class FunctionCallCCC final : public FunctionCallFilterCCC { public: FunctionCallCCC(Sema &SemaRef, const IdentifierInfo *FuncName, unsigned NumArgs, MemberExpr *ME) : FunctionCallFilterCCC(SemaRef, NumArgs, false, ME), FunctionName(FuncName) {} bool ValidateCandidate(const TypoCorrection &candidate) override { if (!candidate.getCorrectionSpecifier() || candidate.getCorrectionAsIdentifierInfo() != FunctionName) { return false; } return FunctionCallFilterCCC::ValidateCandidate(candidate); } std::unique_ptr clone() override { return std::make_unique(*this); } private: const IdentifierInfo *const FunctionName; }; } static TypoCorrection TryTypoCorrectionForCall(Sema &S, Expr *Fn, FunctionDecl *FDecl, ArrayRef Args) { MemberExpr *ME = dyn_cast(Fn); DeclarationName FuncName = FDecl->getDeclName(); SourceLocation NameLoc = ME ? ME->getMemberLoc() : Fn->getBeginLoc(); FunctionCallCCC CCC(S, FuncName.getAsIdentifierInfo(), Args.size(), ME); if (TypoCorrection Corrected = S.CorrectTypo( DeclarationNameInfo(FuncName, NameLoc), Sema::LookupOrdinaryName, S.getScopeForContext(S.CurContext), nullptr, CCC, Sema::CTK_ErrorRecovery)) { if (NamedDecl *ND = Corrected.getFoundDecl()) { if (Corrected.isOverloaded()) { OverloadCandidateSet OCS(NameLoc, OverloadCandidateSet::CSK_Normal); OverloadCandidateSet::iterator Best; for (NamedDecl *CD : Corrected) { if (FunctionDecl *FD = dyn_cast(CD)) S.AddOverloadCandidate(FD, DeclAccessPair::make(FD, AS_none), Args, OCS); } switch (OCS.BestViableFunction(S, NameLoc, Best)) { case OR_Success: ND = Best->FoundDecl; Corrected.setCorrectionDecl(ND); break; default: break; } } ND = ND->getUnderlyingDecl(); if (isa(ND) || isa(ND)) return Corrected; } } return TypoCorrection(); } /// ConvertArgumentsForCall - Converts the arguments specified in /// Args/NumArgs to the parameter types of the function FDecl with /// function prototype Proto. Call is the call expression itself, and /// Fn is the function expression. For a C++ member function, this /// routine does not attempt to convert the object argument. Returns /// true if the call is ill-formed. bool Sema::ConvertArgumentsForCall(CallExpr *Call, Expr *Fn, FunctionDecl *FDecl, const FunctionProtoType *Proto, ArrayRef Args, SourceLocation RParenLoc, bool IsExecConfig) { // Bail out early if calling a builtin with custom typechecking. if (FDecl) if (unsigned ID = FDecl->getBuiltinID()) if (Context.BuiltinInfo.hasCustomTypechecking(ID)) return false; // C99 6.5.2.2p7 - the arguments are implicitly converted, as if by // assignment, to the types of the corresponding parameter, ... unsigned NumParams = Proto->getNumParams(); bool Invalid = false; unsigned MinArgs = FDecl ? FDecl->getMinRequiredArguments() : NumParams; unsigned FnKind = Fn->getType()->isBlockPointerType() ? 1 /* block */ : (IsExecConfig ? 3 /* kernel function (exec config) */ : 0 /* function */); // If too few arguments are available (and we don't have default // arguments for the remaining parameters), don't make the call. if (Args.size() < NumParams) { if (Args.size() < MinArgs) { TypoCorrection TC; if (FDecl && (TC = TryTypoCorrectionForCall(*this, Fn, FDecl, Args))) { unsigned diag_id = MinArgs == NumParams && !Proto->isVariadic() ? diag::err_typecheck_call_too_few_args_suggest : diag::err_typecheck_call_too_few_args_at_least_suggest; diagnoseTypo(TC, PDiag(diag_id) << FnKind << MinArgs << static_cast(Args.size()) << TC.getCorrectionRange()); } else if (MinArgs == 1 && FDecl && FDecl->getParamDecl(0)->getDeclName()) Diag(RParenLoc, MinArgs == NumParams && !Proto->isVariadic() ? diag::err_typecheck_call_too_few_args_one : diag::err_typecheck_call_too_few_args_at_least_one) << FnKind << FDecl->getParamDecl(0) << Fn->getSourceRange(); else Diag(RParenLoc, MinArgs == NumParams && !Proto->isVariadic() ? diag::err_typecheck_call_too_few_args : diag::err_typecheck_call_too_few_args_at_least) << FnKind << MinArgs << static_cast(Args.size()) << Fn->getSourceRange(); // Emit the location of the prototype. if (!TC && FDecl && !FDecl->getBuiltinID() && !IsExecConfig) Diag(FDecl->getLocation(), diag::note_callee_decl) << FDecl; return true; } // We reserve space for the default arguments when we create // the call expression, before calling ConvertArgumentsForCall. assert((Call->getNumArgs() == NumParams) && "We should have reserved space for the default arguments before!"); } // If too many are passed and not variadic, error on the extras and drop // them. if (Args.size() > NumParams) { if (!Proto->isVariadic()) { TypoCorrection TC; if (FDecl && (TC = TryTypoCorrectionForCall(*this, Fn, FDecl, Args))) { unsigned diag_id = MinArgs == NumParams && !Proto->isVariadic() ? diag::err_typecheck_call_too_many_args_suggest : diag::err_typecheck_call_too_many_args_at_most_suggest; diagnoseTypo(TC, PDiag(diag_id) << FnKind << NumParams << static_cast(Args.size()) << TC.getCorrectionRange()); } else if (NumParams == 1 && FDecl && FDecl->getParamDecl(0)->getDeclName()) Diag(Args[NumParams]->getBeginLoc(), MinArgs == NumParams ? diag::err_typecheck_call_too_many_args_one : diag::err_typecheck_call_too_many_args_at_most_one) << FnKind << FDecl->getParamDecl(0) << static_cast(Args.size()) << Fn->getSourceRange() << SourceRange(Args[NumParams]->getBeginLoc(), Args.back()->getEndLoc()); else Diag(Args[NumParams]->getBeginLoc(), MinArgs == NumParams ? diag::err_typecheck_call_too_many_args : diag::err_typecheck_call_too_many_args_at_most) << FnKind << NumParams << static_cast(Args.size()) << Fn->getSourceRange() << SourceRange(Args[NumParams]->getBeginLoc(), Args.back()->getEndLoc()); // Emit the location of the prototype. if (!TC && FDecl && !FDecl->getBuiltinID() && !IsExecConfig) Diag(FDecl->getLocation(), diag::note_callee_decl) << FDecl; // This deletes the extra arguments. Call->shrinkNumArgs(NumParams); return true; } } SmallVector AllArgs; VariadicCallType CallType = getVariadicCallType(FDecl, Proto, Fn); Invalid = GatherArgumentsForCall(Call->getBeginLoc(), FDecl, Proto, 0, Args, AllArgs, CallType); if (Invalid) return true; unsigned TotalNumArgs = AllArgs.size(); for (unsigned i = 0; i < TotalNumArgs; ++i) Call->setArg(i, AllArgs[i]); Call->computeDependence(); return false; } bool Sema::GatherArgumentsForCall(SourceLocation CallLoc, FunctionDecl *FDecl, const FunctionProtoType *Proto, unsigned FirstParam, ArrayRef Args, SmallVectorImpl &AllArgs, VariadicCallType CallType, bool AllowExplicit, bool IsListInitialization) { unsigned NumParams = Proto->getNumParams(); bool Invalid = false; size_t ArgIx = 0; // Continue to check argument types (even if we have too few/many args). for (unsigned i = FirstParam; i < NumParams; i++) { QualType ProtoArgType = Proto->getParamType(i); Expr *Arg; ParmVarDecl *Param = FDecl ? FDecl->getParamDecl(i) : nullptr; if (ArgIx < Args.size()) { Arg = Args[ArgIx++]; if (RequireCompleteType(Arg->getBeginLoc(), ProtoArgType, diag::err_call_incomplete_argument, Arg)) return true; // Strip the unbridged-cast placeholder expression off, if applicable. bool CFAudited = false; if (Arg->getType() == Context.ARCUnbridgedCastTy && FDecl && FDecl->hasAttr() && (!Param || !Param->hasAttr())) Arg = stripARCUnbridgedCast(Arg); else if (getLangOpts().ObjCAutoRefCount && FDecl && FDecl->hasAttr() && (!Param || !Param->hasAttr())) CFAudited = true; if (Proto->getExtParameterInfo(i).isNoEscape() && ProtoArgType->isBlockPointerType()) if (auto *BE = dyn_cast(Arg->IgnoreParenNoopCasts(Context))) BE->getBlockDecl()->setDoesNotEscape(); InitializedEntity Entity = Param ? InitializedEntity::InitializeParameter(Context, Param, ProtoArgType) : InitializedEntity::InitializeParameter( Context, ProtoArgType, Proto->isParamConsumed(i)); // Remember that parameter belongs to a CF audited API. if (CFAudited) Entity.setParameterCFAudited(); ExprResult ArgE = PerformCopyInitialization( Entity, SourceLocation(), Arg, IsListInitialization, AllowExplicit); if (ArgE.isInvalid()) return true; Arg = ArgE.getAs(); } else { assert(Param && "can't use default arguments without a known callee"); ExprResult ArgExpr = BuildCXXDefaultArgExpr(CallLoc, FDecl, Param); if (ArgExpr.isInvalid()) return true; Arg = ArgExpr.getAs(); } // Check for array bounds violations for each argument to the call. This // check only triggers warnings when the argument isn't a more complex Expr // with its own checking, such as a BinaryOperator. CheckArrayAccess(Arg); // Check for violations of C99 static array rules (C99 6.7.5.3p7). CheckStaticArrayArgument(CallLoc, Param, Arg); AllArgs.push_back(Arg); } // If this is a variadic call, handle args passed through "...". if (CallType != VariadicDoesNotApply) { // Assume that extern "C" functions with variadic arguments that // return __unknown_anytype aren't *really* variadic. if (Proto->getReturnType() == Context.UnknownAnyTy && FDecl && FDecl->isExternC()) { for (Expr *A : Args.slice(ArgIx)) { QualType paramType; // ignored ExprResult arg = checkUnknownAnyArg(CallLoc, A, paramType); Invalid |= arg.isInvalid(); AllArgs.push_back(arg.get()); } // Otherwise do argument promotion, (C99 6.5.2.2p7). } else { for (Expr *A : Args.slice(ArgIx)) { ExprResult Arg = DefaultVariadicArgumentPromotion(A, CallType, FDecl); Invalid |= Arg.isInvalid(); AllArgs.push_back(Arg.get()); } } // Check for array bounds violations. for (Expr *A : Args.slice(ArgIx)) CheckArrayAccess(A); } return Invalid; } static void DiagnoseCalleeStaticArrayParam(Sema &S, ParmVarDecl *PVD) { TypeLoc TL = PVD->getTypeSourceInfo()->getTypeLoc(); if (DecayedTypeLoc DTL = TL.getAs()) TL = DTL.getOriginalLoc(); if (ArrayTypeLoc ATL = TL.getAs()) S.Diag(PVD->getLocation(), diag::note_callee_static_array) << ATL.getLocalSourceRange(); } /// CheckStaticArrayArgument - If the given argument corresponds to a static /// array parameter, check that it is non-null, and that if it is formed by /// array-to-pointer decay, the underlying array is sufficiently large. /// /// C99 6.7.5.3p7: If the keyword static also appears within the [ and ] of the /// array type derivation, then for each call to the function, the value of the /// corresponding actual argument shall provide access to the first element of /// an array with at least as many elements as specified by the size expression. void Sema::CheckStaticArrayArgument(SourceLocation CallLoc, ParmVarDecl *Param, const Expr *ArgExpr) { // Static array parameters are not supported in C++. if (!Param || getLangOpts().CPlusPlus) return; QualType OrigTy = Param->getOriginalType(); const ArrayType *AT = Context.getAsArrayType(OrigTy); if (!AT || AT->getSizeModifier() != ArrayType::Static) return; if (ArgExpr->isNullPointerConstant(Context, Expr::NPC_NeverValueDependent)) { Diag(CallLoc, diag::warn_null_arg) << ArgExpr->getSourceRange(); DiagnoseCalleeStaticArrayParam(*this, Param); return; } const ConstantArrayType *CAT = dyn_cast(AT); if (!CAT) return; const ConstantArrayType *ArgCAT = Context.getAsConstantArrayType(ArgExpr->IgnoreParenCasts()->getType()); if (!ArgCAT) return; if (getASTContext().hasSameUnqualifiedType(CAT->getElementType(), ArgCAT->getElementType())) { if (ArgCAT->getSize().ult(CAT->getSize())) { Diag(CallLoc, diag::warn_static_array_too_small) << ArgExpr->getSourceRange() << (unsigned)ArgCAT->getSize().getZExtValue() << (unsigned)CAT->getSize().getZExtValue() << 0; DiagnoseCalleeStaticArrayParam(*this, Param); } return; } Optional ArgSize = getASTContext().getTypeSizeInCharsIfKnown(ArgCAT); Optional ParmSize = getASTContext().getTypeSizeInCharsIfKnown(CAT); if (ArgSize && ParmSize && *ArgSize < *ParmSize) { Diag(CallLoc, diag::warn_static_array_too_small) << ArgExpr->getSourceRange() << (unsigned)ArgSize->getQuantity() << (unsigned)ParmSize->getQuantity() << 1; DiagnoseCalleeStaticArrayParam(*this, Param); } } /// Given a function expression of unknown-any type, try to rebuild it /// to have a function type. static ExprResult rebuildUnknownAnyFunction(Sema &S, Expr *fn); /// Is the given type a placeholder that we need to lower out /// immediately during argument processing? static bool isPlaceholderToRemoveAsArg(QualType type) { // Placeholders are never sugared. const BuiltinType *placeholder = dyn_cast(type); if (!placeholder) return false; switch (placeholder->getKind()) { // Ignore all the non-placeholder types. #define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \ case BuiltinType::Id: #include "clang/Basic/OpenCLImageTypes.def" #define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \ case BuiltinType::Id: #include "clang/Basic/OpenCLExtensionTypes.def" // In practice we'll never use this, since all SVE types are sugared // via TypedefTypes rather than exposed directly as BuiltinTypes. #define SVE_TYPE(Name, Id, SingletonId) \ case BuiltinType::Id: #include "clang/Basic/AArch64SVEACLETypes.def" #define PPC_VECTOR_TYPE(Name, Id, Size) \ case BuiltinType::Id: #include "clang/Basic/PPCTypes.def" #define RVV_TYPE(Name, Id, SingletonId) case BuiltinType::Id: #include "clang/Basic/RISCVVTypes.def" #define PLACEHOLDER_TYPE(ID, SINGLETON_ID) #define BUILTIN_TYPE(ID, SINGLETON_ID) case BuiltinType::ID: #include "clang/AST/BuiltinTypes.def" return false; // We cannot lower out overload sets; they might validly be resolved // by the call machinery. case BuiltinType::Overload: return false; // Unbridged casts in ARC can be handled in some call positions and // should be left in place. case BuiltinType::ARCUnbridgedCast: return false; // Pseudo-objects should be converted as soon as possible. case BuiltinType::PseudoObject: return true; // The debugger mode could theoretically but currently does not try // to resolve unknown-typed arguments based on known parameter types. case BuiltinType::UnknownAny: return true; // These are always invalid as call arguments and should be reported. case BuiltinType::BoundMember: case BuiltinType::BuiltinFn: case BuiltinType::IncompleteMatrixIdx: case BuiltinType::OMPArraySection: case BuiltinType::OMPArrayShaping: case BuiltinType::OMPIterator: return true; } llvm_unreachable("bad builtin type kind"); } /// Check an argument list for placeholders that we won't try to /// handle later. static bool checkArgsForPlaceholders(Sema &S, MultiExprArg args) { // Apply this processing to all the arguments at once instead of // dying at the first failure. bool hasInvalid = false; for (size_t i = 0, e = args.size(); i != e; i++) { if (isPlaceholderToRemoveAsArg(args[i]->getType())) { ExprResult result = S.CheckPlaceholderExpr(args[i]); if (result.isInvalid()) hasInvalid = true; else args[i] = result.get(); } } return hasInvalid; } /// If a builtin function has a pointer argument with no explicit address /// space, then it should be able to accept a pointer to any address /// space as input. In order to do this, we need to replace the /// standard builtin declaration with one that uses the same address space /// as the call. /// /// \returns nullptr If this builtin is not a candidate for a rewrite i.e. /// it does not contain any pointer arguments without /// an address space qualifer. Otherwise the rewritten /// FunctionDecl is returned. /// TODO: Handle pointer return types. static FunctionDecl *rewriteBuiltinFunctionDecl(Sema *Sema, ASTContext &Context, FunctionDecl *FDecl, MultiExprArg ArgExprs) { QualType DeclType = FDecl->getType(); const FunctionProtoType *FT = dyn_cast(DeclType); if (!Context.BuiltinInfo.hasPtrArgsOrResult(FDecl->getBuiltinID()) || !FT || ArgExprs.size() < FT->getNumParams()) return nullptr; bool NeedsNewDecl = false; unsigned i = 0; SmallVector OverloadParams; for (QualType ParamType : FT->param_types()) { // Convert array arguments to pointer to simplify type lookup. ExprResult ArgRes = Sema->DefaultFunctionArrayLvalueConversion(ArgExprs[i++]); if (ArgRes.isInvalid()) return nullptr; Expr *Arg = ArgRes.get(); QualType ArgType = Arg->getType(); if (!ParamType->isPointerType() || ParamType.hasAddressSpace() || !ArgType->isPointerType() || !ArgType->getPointeeType().hasAddressSpace()) { OverloadParams.push_back(ParamType); continue; } QualType PointeeType = ParamType->getPointeeType(); if (PointeeType.hasAddressSpace()) continue; NeedsNewDecl = true; LangAS AS = ArgType->getPointeeType().getAddressSpace(); PointeeType = Context.getAddrSpaceQualType(PointeeType, AS); OverloadParams.push_back(Context.getPointerType(PointeeType)); } if (!NeedsNewDecl) return nullptr; FunctionProtoType::ExtProtoInfo EPI; EPI.Variadic = FT->isVariadic(); QualType OverloadTy = Context.getFunctionType(FT->getReturnType(), OverloadParams, EPI); DeclContext *Parent = FDecl->getParent(); FunctionDecl *OverloadDecl = FunctionDecl::Create( Context, Parent, FDecl->getLocation(), FDecl->getLocation(), FDecl->getIdentifier(), OverloadTy, /*TInfo=*/nullptr, SC_Extern, Sema->getCurFPFeatures().isFPConstrained(), false, /*hasPrototype=*/true); SmallVector Params; FT = cast(OverloadTy); for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) { QualType ParamType = FT->getParamType(i); ParmVarDecl *Parm = ParmVarDecl::Create(Context, OverloadDecl, SourceLocation(), SourceLocation(), nullptr, ParamType, /*TInfo=*/nullptr, SC_None, nullptr); Parm->setScopeInfo(0, i); Params.push_back(Parm); } OverloadDecl->setParams(Params); Sema->mergeDeclAttributes(OverloadDecl, FDecl); return OverloadDecl; } static void checkDirectCallValidity(Sema &S, const Expr *Fn, FunctionDecl *Callee, MultiExprArg ArgExprs) { // `Callee` (when called with ArgExprs) may be ill-formed. enable_if (and // similar attributes) really don't like it when functions are called with an // invalid number of args. if (S.TooManyArguments(Callee->getNumParams(), ArgExprs.size(), /*PartialOverloading=*/false) && !Callee->isVariadic()) return; if (Callee->getMinRequiredArguments() > ArgExprs.size()) return; if (const EnableIfAttr *Attr = S.CheckEnableIf(Callee, Fn->getBeginLoc(), ArgExprs, true)) { S.Diag(Fn->getBeginLoc(), isa(Callee) ? diag::err_ovl_no_viable_member_function_in_call : diag::err_ovl_no_viable_function_in_call) << Callee << Callee->getSourceRange(); S.Diag(Callee->getLocation(), diag::note_ovl_candidate_disabled_by_function_cond_attr) << Attr->getCond()->getSourceRange() << Attr->getMessage(); return; } } static bool enclosingClassIsRelatedToClassInWhichMembersWereFound( const UnresolvedMemberExpr *const UME, Sema &S) { const auto GetFunctionLevelDCIfCXXClass = [](Sema &S) -> const CXXRecordDecl * { const DeclContext *const DC = S.getFunctionLevelDeclContext(); if (!DC || !DC->getParent()) return nullptr; // If the call to some member function was made from within a member // function body 'M' return return 'M's parent. if (const auto *MD = dyn_cast(DC)) return MD->getParent()->getCanonicalDecl(); // else the call was made from within a default member initializer of a // class, so return the class. if (const auto *RD = dyn_cast(DC)) return RD->getCanonicalDecl(); return nullptr; }; // If our DeclContext is neither a member function nor a class (in the // case of a lambda in a default member initializer), we can't have an // enclosing 'this'. const CXXRecordDecl *const CurParentClass = GetFunctionLevelDCIfCXXClass(S); if (!CurParentClass) return false; // The naming class for implicit member functions call is the class in which // name lookup starts. const CXXRecordDecl *const NamingClass = UME->getNamingClass()->getCanonicalDecl(); assert(NamingClass && "Must have naming class even for implicit access"); // If the unresolved member functions were found in a 'naming class' that is // related (either the same or derived from) to the class that contains the // member function that itself contained the implicit member access. return CurParentClass == NamingClass || CurParentClass->isDerivedFrom(NamingClass); } static void tryImplicitlyCaptureThisIfImplicitMemberFunctionAccessWithDependentArgs( Sema &S, const UnresolvedMemberExpr *const UME, SourceLocation CallLoc) { if (!UME) return; LambdaScopeInfo *const CurLSI = S.getCurLambda(); // Only try and implicitly capture 'this' within a C++ Lambda if it hasn't // already been captured, or if this is an implicit member function call (if // it isn't, an attempt to capture 'this' should already have been made). if (!CurLSI || CurLSI->ImpCaptureStyle == CurLSI->ImpCap_None || !UME->isImplicitAccess() || CurLSI->isCXXThisCaptured()) return; // Check if the naming class in which the unresolved members were found is // related (same as or is a base of) to the enclosing class. if (!enclosingClassIsRelatedToClassInWhichMembersWereFound(UME, S)) return; DeclContext *EnclosingFunctionCtx = S.CurContext->getParent()->getParent(); // If the enclosing function is not dependent, then this lambda is // capture ready, so if we can capture this, do so. if (!EnclosingFunctionCtx->isDependentContext()) { // If the current lambda and all enclosing lambdas can capture 'this' - // then go ahead and capture 'this' (since our unresolved overload set // contains at least one non-static member function). if (!S.CheckCXXThisCapture(CallLoc, /*Explcit*/ false, /*Diagnose*/ false)) S.CheckCXXThisCapture(CallLoc); } else if (S.CurContext->isDependentContext()) { // ... since this is an implicit member reference, that might potentially // involve a 'this' capture, mark 'this' for potential capture in // enclosing lambdas. if (CurLSI->ImpCaptureStyle != CurLSI->ImpCap_None) CurLSI->addPotentialThisCapture(CallLoc); } } // Once a call is fully resolved, warn for unqualified calls to specific // C++ standard functions, like move and forward. static void DiagnosedUnqualifiedCallsToStdFunctions(Sema &S, CallExpr *Call) { // We are only checking unary move and forward so exit early here. if (Call->getNumArgs() != 1) return; Expr *E = Call->getCallee()->IgnoreParenImpCasts(); if (!E || isa(E)) return; DeclRefExpr *DRE = dyn_cast_or_null(E); if (!DRE || !DRE->getLocation().isValid()) return; if (DRE->getQualifier()) return; const FunctionDecl *FD = Call->getDirectCallee(); if (!FD) return; // Only warn for some functions deemed more frequent or problematic. unsigned BuiltinID = FD->getBuiltinID(); if (BuiltinID != Builtin::BImove && BuiltinID != Builtin::BIforward) return; S.Diag(DRE->getLocation(), diag::warn_unqualified_call_to_std_cast_function) << FD->getQualifiedNameAsString() << FixItHint::CreateInsertion(DRE->getLocation(), "std::"); } ExprResult Sema::ActOnCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc, MultiExprArg ArgExprs, SourceLocation RParenLoc, Expr *ExecConfig) { ExprResult Call = BuildCallExpr(Scope, Fn, LParenLoc, ArgExprs, RParenLoc, ExecConfig, /*IsExecConfig=*/false, /*AllowRecovery=*/true); if (Call.isInvalid()) return Call; // Diagnose uses of the C++20 "ADL-only template-id call" feature in earlier // language modes. if (auto *ULE = dyn_cast(Fn)) { if (ULE->hasExplicitTemplateArgs() && ULE->decls_begin() == ULE->decls_end()) { Diag(Fn->getExprLoc(), getLangOpts().CPlusPlus20 ? diag::warn_cxx17_compat_adl_only_template_id : diag::ext_adl_only_template_id) << ULE->getName(); } } if (LangOpts.OpenMP) Call = ActOnOpenMPCall(Call, Scope, LParenLoc, ArgExprs, RParenLoc, ExecConfig); if (LangOpts.CPlusPlus) { CallExpr *CE = dyn_cast(Call.get()); if (CE) DiagnosedUnqualifiedCallsToStdFunctions(*this, CE); } return Call; } /// BuildCallExpr - Handle a call to Fn with the specified array of arguments. /// This provides the location of the left/right parens and a list of comma /// locations. ExprResult Sema::BuildCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc, MultiExprArg ArgExprs, SourceLocation RParenLoc, Expr *ExecConfig, bool IsExecConfig, bool AllowRecovery) { // Since this might be a postfix expression, get rid of ParenListExprs. ExprResult Result = MaybeConvertParenListExprToParenExpr(Scope, Fn); if (Result.isInvalid()) return ExprError(); Fn = Result.get(); if (checkArgsForPlaceholders(*this, ArgExprs)) return ExprError(); if (getLangOpts().CPlusPlus) { // If this is a pseudo-destructor expression, build the call immediately. if (isa(Fn)) { if (!ArgExprs.empty()) { // Pseudo-destructor calls should not have any arguments. Diag(Fn->getBeginLoc(), diag::err_pseudo_dtor_call_with_args) << FixItHint::CreateRemoval( SourceRange(ArgExprs.front()->getBeginLoc(), ArgExprs.back()->getEndLoc())); } return CallExpr::Create(Context, Fn, /*Args=*/{}, Context.VoidTy, VK_PRValue, RParenLoc, CurFPFeatureOverrides()); } if (Fn->getType() == Context.PseudoObjectTy) { ExprResult result = CheckPlaceholderExpr(Fn); if (result.isInvalid()) return ExprError(); Fn = result.get(); } // Determine whether this is a dependent call inside a C++ template, // in which case we won't do any semantic analysis now. if (Fn->isTypeDependent() || Expr::hasAnyTypeDependentArguments(ArgExprs)) { if (ExecConfig) { return CUDAKernelCallExpr::Create(Context, Fn, cast(ExecConfig), ArgExprs, Context.DependentTy, VK_PRValue, RParenLoc, CurFPFeatureOverrides()); } else { tryImplicitlyCaptureThisIfImplicitMemberFunctionAccessWithDependentArgs( *this, dyn_cast(Fn->IgnoreParens()), Fn->getBeginLoc()); return CallExpr::Create(Context, Fn, ArgExprs, Context.DependentTy, VK_PRValue, RParenLoc, CurFPFeatureOverrides()); } } // Determine whether this is a call to an object (C++ [over.call.object]). if (Fn->getType()->isRecordType()) return BuildCallToObjectOfClassType(Scope, Fn, LParenLoc, ArgExprs, RParenLoc); if (Fn->getType() == Context.UnknownAnyTy) { ExprResult result = rebuildUnknownAnyFunction(*this, Fn); if (result.isInvalid()) return ExprError(); Fn = result.get(); } if (Fn->getType() == Context.BoundMemberTy) { return BuildCallToMemberFunction(Scope, Fn, LParenLoc, ArgExprs, RParenLoc, ExecConfig, IsExecConfig, AllowRecovery); } } // Check for overloaded calls. This can happen even in C due to extensions. if (Fn->getType() == Context.OverloadTy) { OverloadExpr::FindResult find = OverloadExpr::find(Fn); // We aren't supposed to apply this logic if there's an '&' involved. if (!find.HasFormOfMemberPointer) { if (Expr::hasAnyTypeDependentArguments(ArgExprs)) return CallExpr::Create(Context, Fn, ArgExprs, Context.DependentTy, VK_PRValue, RParenLoc, CurFPFeatureOverrides()); OverloadExpr *ovl = find.Expression; if (UnresolvedLookupExpr *ULE = dyn_cast(ovl)) return BuildOverloadedCallExpr( Scope, Fn, ULE, LParenLoc, ArgExprs, RParenLoc, ExecConfig, /*AllowTypoCorrection=*/true, find.IsAddressOfOperand); return BuildCallToMemberFunction(Scope, Fn, LParenLoc, ArgExprs, RParenLoc, ExecConfig, IsExecConfig, AllowRecovery); } } // If we're directly calling a function, get the appropriate declaration. if (Fn->getType() == Context.UnknownAnyTy) { ExprResult result = rebuildUnknownAnyFunction(*this, Fn); if (result.isInvalid()) return ExprError(); Fn = result.get(); } Expr *NakedFn = Fn->IgnoreParens(); bool CallingNDeclIndirectly = false; NamedDecl *NDecl = nullptr; if (UnaryOperator *UnOp = dyn_cast(NakedFn)) { if (UnOp->getOpcode() == UO_AddrOf) { CallingNDeclIndirectly = true; NakedFn = UnOp->getSubExpr()->IgnoreParens(); } } if (auto *DRE = dyn_cast(NakedFn)) { NDecl = DRE->getDecl(); FunctionDecl *FDecl = dyn_cast(NDecl); if (FDecl && FDecl->getBuiltinID()) { // Rewrite the function decl for this builtin by replacing parameters // with no explicit address space with the address space of the arguments // in ArgExprs. if ((FDecl = rewriteBuiltinFunctionDecl(this, Context, FDecl, ArgExprs))) { NDecl = FDecl; Fn = DeclRefExpr::Create( Context, FDecl->getQualifierLoc(), SourceLocation(), FDecl, false, SourceLocation(), FDecl->getType(), Fn->getValueKind(), FDecl, nullptr, DRE->isNonOdrUse()); } } } else if (isa(NakedFn)) NDecl = cast(NakedFn)->getMemberDecl(); if (FunctionDecl *FD = dyn_cast_or_null(NDecl)) { if (CallingNDeclIndirectly && !checkAddressOfFunctionIsAvailable( FD, /*Complain=*/true, Fn->getBeginLoc())) return ExprError(); checkDirectCallValidity(*this, Fn, FD, ArgExprs); // If this expression is a call to a builtin function in HIP device // compilation, allow a pointer-type argument to default address space to be // passed as a pointer-type parameter to a non-default address space. // If Arg is declared in the default address space and Param is declared // in a non-default address space, perform an implicit address space cast to // the parameter type. if (getLangOpts().HIP && getLangOpts().CUDAIsDevice && FD && FD->getBuiltinID()) { for (unsigned Idx = 0; Idx < FD->param_size(); ++Idx) { ParmVarDecl *Param = FD->getParamDecl(Idx); if (!ArgExprs[Idx] || !Param || !Param->getType()->isPointerType() || !ArgExprs[Idx]->getType()->isPointerType()) continue; auto ParamAS = Param->getType()->getPointeeType().getAddressSpace(); auto ArgTy = ArgExprs[Idx]->getType(); auto ArgPtTy = ArgTy->getPointeeType(); auto ArgAS = ArgPtTy.getAddressSpace(); // Add address space cast if target address spaces are different bool NeedImplicitASC = ParamAS != LangAS::Default && // Pointer params in generic AS don't need special handling. ( ArgAS == LangAS::Default || // We do allow implicit conversion from generic AS // or from specific AS which has target AS matching that of Param. getASTContext().getTargetAddressSpace(ArgAS) == getASTContext().getTargetAddressSpace(ParamAS)); if (!NeedImplicitASC) continue; // First, ensure that the Arg is an RValue. if (ArgExprs[Idx]->isGLValue()) { ArgExprs[Idx] = ImplicitCastExpr::Create( Context, ArgExprs[Idx]->getType(), CK_NoOp, ArgExprs[Idx], nullptr, VK_PRValue, FPOptionsOverride()); } // Construct a new arg type with address space of Param Qualifiers ArgPtQuals = ArgPtTy.getQualifiers(); ArgPtQuals.setAddressSpace(ParamAS); auto NewArgPtTy = Context.getQualifiedType(ArgPtTy.getUnqualifiedType(), ArgPtQuals); auto NewArgTy = Context.getQualifiedType(Context.getPointerType(NewArgPtTy), ArgTy.getQualifiers()); // Finally perform an implicit address space cast ArgExprs[Idx] = ImpCastExprToType(ArgExprs[Idx], NewArgTy, CK_AddressSpaceConversion) .get(); } } } if (Context.isDependenceAllowed() && (Fn->isTypeDependent() || Expr::hasAnyTypeDependentArguments(ArgExprs))) { assert(!getLangOpts().CPlusPlus); assert((Fn->containsErrors() || llvm::any_of(ArgExprs, [](clang::Expr *E) { return E->containsErrors(); })) && "should only occur in error-recovery path."); QualType ReturnType = llvm::isa_and_nonnull(NDecl) ? cast(NDecl)->getCallResultType() : Context.DependentTy; return CallExpr::Create(Context, Fn, ArgExprs, ReturnType, Expr::getValueKindForType(ReturnType), RParenLoc, CurFPFeatureOverrides()); } return BuildResolvedCallExpr(Fn, NDecl, LParenLoc, ArgExprs, RParenLoc, ExecConfig, IsExecConfig); } /// BuildBuiltinCallExpr - Create a call to a builtin function specified by Id // with the specified CallArgs Expr *Sema::BuildBuiltinCallExpr(SourceLocation Loc, Builtin::ID Id, MultiExprArg CallArgs) { StringRef Name = Context.BuiltinInfo.getName(Id); LookupResult R(*this, &Context.Idents.get(Name), Loc, Sema::LookupOrdinaryName); LookupName(R, TUScope, /*AllowBuiltinCreation=*/true); auto *BuiltInDecl = R.getAsSingle(); assert(BuiltInDecl && "failed to find builtin declaration"); ExprResult DeclRef = BuildDeclRefExpr(BuiltInDecl, BuiltInDecl->getType(), VK_LValue, Loc); assert(DeclRef.isUsable() && "Builtin reference cannot fail"); ExprResult Call = BuildCallExpr(/*Scope=*/nullptr, DeclRef.get(), Loc, CallArgs, Loc); assert(!Call.isInvalid() && "Call to builtin cannot fail!"); return Call.get(); } /// Parse a __builtin_astype expression. /// /// __builtin_astype( value, dst type ) /// ExprResult Sema::ActOnAsTypeExpr(Expr *E, ParsedType ParsedDestTy, SourceLocation BuiltinLoc, SourceLocation RParenLoc) { QualType DstTy = GetTypeFromParser(ParsedDestTy); return BuildAsTypeExpr(E, DstTy, BuiltinLoc, RParenLoc); } /// Create a new AsTypeExpr node (bitcast) from the arguments. ExprResult Sema::BuildAsTypeExpr(Expr *E, QualType DestTy, SourceLocation BuiltinLoc, SourceLocation RParenLoc) { ExprValueKind VK = VK_PRValue; ExprObjectKind OK = OK_Ordinary; QualType SrcTy = E->getType(); if (!SrcTy->isDependentType() && Context.getTypeSize(DestTy) != Context.getTypeSize(SrcTy)) return ExprError( Diag(BuiltinLoc, diag::err_invalid_astype_of_different_size) << DestTy << SrcTy << E->getSourceRange()); return new (Context) AsTypeExpr(E, DestTy, VK, OK, BuiltinLoc, RParenLoc); } /// ActOnConvertVectorExpr - create a new convert-vector expression from the /// provided arguments. /// /// __builtin_convertvector( value, dst type ) /// ExprResult Sema::ActOnConvertVectorExpr(Expr *E, ParsedType ParsedDestTy, SourceLocation BuiltinLoc, SourceLocation RParenLoc) { TypeSourceInfo *TInfo; GetTypeFromParser(ParsedDestTy, &TInfo); return SemaConvertVectorExpr(E, TInfo, BuiltinLoc, RParenLoc); } /// BuildResolvedCallExpr - Build a call to a resolved expression, /// i.e. an expression not of \p OverloadTy. The expression should /// unary-convert to an expression of function-pointer or /// block-pointer type. /// /// \param NDecl the declaration being called, if available ExprResult Sema::BuildResolvedCallExpr(Expr *Fn, NamedDecl *NDecl, SourceLocation LParenLoc, ArrayRef Args, SourceLocation RParenLoc, Expr *Config, bool IsExecConfig, ADLCallKind UsesADL) { FunctionDecl *FDecl = dyn_cast_or_null(NDecl); unsigned BuiltinID = (FDecl ? FDecl->getBuiltinID() : 0); // Functions with 'interrupt' attribute cannot be called directly. if (FDecl && FDecl->hasAttr()) { Diag(Fn->getExprLoc(), diag::err_anyx86_interrupt_called); return ExprError(); } // Interrupt handlers don't save off the VFP regs automatically on ARM, // so there's some risk when calling out to non-interrupt handler functions // that the callee might not preserve them. This is easy to diagnose here, // but can be very challenging to debug. // Likewise, X86 interrupt handlers may only call routines with attribute // no_caller_saved_registers since there is no efficient way to // save and restore the non-GPR state. if (auto *Caller = getCurFunctionDecl()) { if (Caller->hasAttr()) { bool VFP = Context.getTargetInfo().hasFeature("vfp"); if (VFP && (!FDecl || !FDecl->hasAttr())) { Diag(Fn->getExprLoc(), diag::warn_arm_interrupt_calling_convention); if (FDecl) Diag(FDecl->getLocation(), diag::note_callee_decl) << FDecl; } } if (Caller->hasAttr() && ((!FDecl || !FDecl->hasAttr()))) { Diag(Fn->getExprLoc(), diag::warn_anyx86_interrupt_regsave); if (FDecl) Diag(FDecl->getLocation(), diag::note_callee_decl) << FDecl; } } // Promote the function operand. // We special-case function promotion here because we only allow promoting // builtin functions to function pointers in the callee of a call. ExprResult Result; QualType ResultTy; if (BuiltinID && Fn->getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn)) { // Extract the return type from the (builtin) function pointer type. // FIXME Several builtins still have setType in // Sema::CheckBuiltinFunctionCall. One should review their definitions in // Builtins.def to ensure they are correct before removing setType calls. QualType FnPtrTy = Context.getPointerType(FDecl->getType()); Result = ImpCastExprToType(Fn, FnPtrTy, CK_BuiltinFnToFnPtr).get(); ResultTy = FDecl->getCallResultType(); } else { Result = CallExprUnaryConversions(Fn); ResultTy = Context.BoolTy; } if (Result.isInvalid()) return ExprError(); Fn = Result.get(); // Check for a valid function type, but only if it is not a builtin which // requires custom type checking. These will be handled by // CheckBuiltinFunctionCall below just after creation of the call expression. const FunctionType *FuncT = nullptr; if (!BuiltinID || !Context.BuiltinInfo.hasCustomTypechecking(BuiltinID)) { retry: if (const PointerType *PT = Fn->getType()->getAs()) { // C99 6.5.2.2p1 - "The expression that denotes the called function shall // have type pointer to function". FuncT = PT->getPointeeType()->getAs(); if (!FuncT) return ExprError(Diag(LParenLoc, diag::err_typecheck_call_not_function) << Fn->getType() << Fn->getSourceRange()); } else if (const BlockPointerType *BPT = Fn->getType()->getAs()) { FuncT = BPT->getPointeeType()->castAs(); } else { // Handle calls to expressions of unknown-any type. if (Fn->getType() == Context.UnknownAnyTy) { ExprResult rewrite = rebuildUnknownAnyFunction(*this, Fn); if (rewrite.isInvalid()) return ExprError(); Fn = rewrite.get(); goto retry; } return ExprError(Diag(LParenLoc, diag::err_typecheck_call_not_function) << Fn->getType() << Fn->getSourceRange()); } } // Get the number of parameters in the function prototype, if any. // We will allocate space for max(Args.size(), NumParams) arguments // in the call expression. const auto *Proto = dyn_cast_or_null(FuncT); unsigned NumParams = Proto ? Proto->getNumParams() : 0; CallExpr *TheCall; if (Config) { assert(UsesADL == ADLCallKind::NotADL && "CUDAKernelCallExpr should not use ADL"); TheCall = CUDAKernelCallExpr::Create(Context, Fn, cast(Config), Args, ResultTy, VK_PRValue, RParenLoc, CurFPFeatureOverrides(), NumParams); } else { TheCall = CallExpr::Create(Context, Fn, Args, ResultTy, VK_PRValue, RParenLoc, CurFPFeatureOverrides(), NumParams, UsesADL); } if (!Context.isDependenceAllowed()) { // Forget about the nulled arguments since typo correction // do not handle them well. TheCall->shrinkNumArgs(Args.size()); // C cannot always handle TypoExpr nodes in builtin calls and direct // function calls as their argument checking don't necessarily handle // dependent types properly, so make sure any TypoExprs have been // dealt with. ExprResult Result = CorrectDelayedTyposInExpr(TheCall); if (!Result.isUsable()) return ExprError(); CallExpr *TheOldCall = TheCall; TheCall = dyn_cast(Result.get()); bool CorrectedTypos = TheCall != TheOldCall; if (!TheCall) return Result; Args = llvm::makeArrayRef(TheCall->getArgs(), TheCall->getNumArgs()); // A new call expression node was created if some typos were corrected. // However it may not have been constructed with enough storage. In this // case, rebuild the node with enough storage. The waste of space is // immaterial since this only happens when some typos were corrected. if (CorrectedTypos && Args.size() < NumParams) { if (Config) TheCall = CUDAKernelCallExpr::Create( Context, Fn, cast(Config), Args, ResultTy, VK_PRValue, RParenLoc, CurFPFeatureOverrides(), NumParams); else TheCall = CallExpr::Create(Context, Fn, Args, ResultTy, VK_PRValue, RParenLoc, CurFPFeatureOverrides(), NumParams, UsesADL); } // We can now handle the nulled arguments for the default arguments. TheCall->setNumArgsUnsafe(std::max(Args.size(), NumParams)); } // Bail out early if calling a builtin with custom type checking. if (BuiltinID && Context.BuiltinInfo.hasCustomTypechecking(BuiltinID)) return CheckBuiltinFunctionCall(FDecl, BuiltinID, TheCall); if (getLangOpts().CUDA) { if (Config) { // CUDA: Kernel calls must be to global functions if (FDecl && !FDecl->hasAttr()) return ExprError(Diag(LParenLoc,diag::err_kern_call_not_global_function) << FDecl << Fn->getSourceRange()); // CUDA: Kernel function must have 'void' return type if (!FuncT->getReturnType()->isVoidType() && !FuncT->getReturnType()->getAs() && !FuncT->getReturnType()->isInstantiationDependentType()) return ExprError(Diag(LParenLoc, diag::err_kern_type_not_void_return) << Fn->getType() << Fn->getSourceRange()); } else { // CUDA: Calls to global functions must be configured if (FDecl && FDecl->hasAttr()) return ExprError(Diag(LParenLoc, diag::err_global_call_not_config) << FDecl << Fn->getSourceRange()); } } // Check for a valid return type if (CheckCallReturnType(FuncT->getReturnType(), Fn->getBeginLoc(), TheCall, FDecl)) return ExprError(); // We know the result type of the call, set it. TheCall->setType(FuncT->getCallResultType(Context)); TheCall->setValueKind(Expr::getValueKindForType(FuncT->getReturnType())); if (Proto) { if (ConvertArgumentsForCall(TheCall, Fn, FDecl, Proto, Args, RParenLoc, IsExecConfig)) return ExprError(); } else { assert(isa(FuncT) && "Unknown FunctionType!"); if (FDecl) { // Check if we have too few/too many template arguments, based // on our knowledge of the function definition. const FunctionDecl *Def = nullptr; if (FDecl->hasBody(Def) && Args.size() != Def->param_size()) { Proto = Def->getType()->getAs(); if (!Proto || !(Proto->isVariadic() && Args.size() >= Def->param_size())) Diag(RParenLoc, diag::warn_call_wrong_number_of_arguments) << (Args.size() > Def->param_size()) << FDecl << Fn->getSourceRange(); } // If the function we're calling isn't a function prototype, but we have // a function prototype from a prior declaratiom, use that prototype. if (!FDecl->hasPrototype()) Proto = FDecl->getType()->getAs(); } // If we still haven't found a prototype to use but there are arguments to // the call, diagnose this as calling a function without a prototype. // However, if we found a function declaration, check to see if // -Wdeprecated-non-prototype was disabled where the function was declared. // If so, we will silence the diagnostic here on the assumption that this // interface is intentional and the user knows what they're doing. We will // also silence the diagnostic if there is a function declaration but it // was implicitly defined (the user already gets diagnostics about the // creation of the implicit function declaration, so the additional warning // is not helpful). if (!Proto && !Args.empty() && (!FDecl || (!FDecl->isImplicit() && !Diags.isIgnored(diag::warn_strict_uses_without_prototype, FDecl->getLocation())))) Diag(LParenLoc, diag::warn_strict_uses_without_prototype) << (FDecl != nullptr) << FDecl; // Promote the arguments (C99 6.5.2.2p6). for (unsigned i = 0, e = Args.size(); i != e; i++) { Expr *Arg = Args[i]; if (Proto && i < Proto->getNumParams()) { InitializedEntity Entity = InitializedEntity::InitializeParameter( Context, Proto->getParamType(i), Proto->isParamConsumed(i)); ExprResult ArgE = PerformCopyInitialization(Entity, SourceLocation(), Arg); if (ArgE.isInvalid()) return true; Arg = ArgE.getAs(); } else { ExprResult ArgE = DefaultArgumentPromotion(Arg); if (ArgE.isInvalid()) return true; Arg = ArgE.getAs(); } if (RequireCompleteType(Arg->getBeginLoc(), Arg->getType(), diag::err_call_incomplete_argument, Arg)) return ExprError(); TheCall->setArg(i, Arg); } TheCall->computeDependence(); } if (CXXMethodDecl *Method = dyn_cast_or_null(FDecl)) if (!Method->isStatic()) return ExprError(Diag(LParenLoc, diag::err_member_call_without_object) << Fn->getSourceRange()); // Check for sentinels if (NDecl) DiagnoseSentinelCalls(NDecl, LParenLoc, Args); // Warn for unions passing across security boundary (CMSE). if (FuncT != nullptr && FuncT->getCmseNSCallAttr()) { for (unsigned i = 0, e = Args.size(); i != e; i++) { if (const auto *RT = dyn_cast(Args[i]->getType().getCanonicalType())) { if (RT->getDecl()->isOrContainsUnion()) Diag(Args[i]->getBeginLoc(), diag::warn_cmse_nonsecure_union) << 0 << i; } } } // Do special checking on direct calls to functions. if (FDecl) { if (CheckFunctionCall(FDecl, TheCall, Proto)) return ExprError(); checkFortifiedBuiltinMemoryFunction(FDecl, TheCall); if (BuiltinID) return CheckBuiltinFunctionCall(FDecl, BuiltinID, TheCall); } else if (NDecl) { if (CheckPointerCall(NDecl, TheCall, Proto)) return ExprError(); } else { if (CheckOtherCall(TheCall, Proto)) return ExprError(); } return CheckForImmediateInvocation(MaybeBindToTemporary(TheCall), FDecl); } ExprResult Sema::ActOnCompoundLiteral(SourceLocation LParenLoc, ParsedType Ty, SourceLocation RParenLoc, Expr *InitExpr) { assert(Ty && "ActOnCompoundLiteral(): missing type"); assert(InitExpr && "ActOnCompoundLiteral(): missing expression"); TypeSourceInfo *TInfo; QualType literalType = GetTypeFromParser(Ty, &TInfo); if (!TInfo) TInfo = Context.getTrivialTypeSourceInfo(literalType); return BuildCompoundLiteralExpr(LParenLoc, TInfo, RParenLoc, InitExpr); } ExprResult Sema::BuildCompoundLiteralExpr(SourceLocation LParenLoc, TypeSourceInfo *TInfo, SourceLocation RParenLoc, Expr *LiteralExpr) { QualType literalType = TInfo->getType(); if (literalType->isArrayType()) { if (RequireCompleteSizedType( LParenLoc, Context.getBaseElementType(literalType), diag::err_array_incomplete_or_sizeless_type, SourceRange(LParenLoc, LiteralExpr->getSourceRange().getEnd()))) return ExprError(); if (literalType->isVariableArrayType()) { if (!tryToFixVariablyModifiedVarType(TInfo, literalType, LParenLoc, diag::err_variable_object_no_init)) { return ExprError(); } } } else if (!literalType->isDependentType() && RequireCompleteType(LParenLoc, literalType, diag::err_typecheck_decl_incomplete_type, SourceRange(LParenLoc, LiteralExpr->getSourceRange().getEnd()))) return ExprError(); InitializedEntity Entity = InitializedEntity::InitializeCompoundLiteralInit(TInfo); InitializationKind Kind = InitializationKind::CreateCStyleCast(LParenLoc, SourceRange(LParenLoc, RParenLoc), /*InitList=*/true); InitializationSequence InitSeq(*this, Entity, Kind, LiteralExpr); ExprResult Result = InitSeq.Perform(*this, Entity, Kind, LiteralExpr, &literalType); if (Result.isInvalid()) return ExprError(); LiteralExpr = Result.get(); bool isFileScope = !CurContext->isFunctionOrMethod(); // In C, compound literals are l-values for some reason. // For GCC compatibility, in C++, file-scope array compound literals with // constant initializers are also l-values, and compound literals are // otherwise prvalues. // // (GCC also treats C++ list-initialized file-scope array prvalues with // constant initializers as l-values, but that's non-conforming, so we don't // follow it there.) // // FIXME: It would be better to handle the lvalue cases as materializing and // lifetime-extending a temporary object, but our materialized temporaries // representation only supports lifetime extension from a variable, not "out // of thin air". // FIXME: For C++, we might want to instead lifetime-extend only if a pointer // is bound to the result of applying array-to-pointer decay to the compound // literal. // FIXME: GCC supports compound literals of reference type, which should // obviously have a value kind derived from the kind of reference involved. ExprValueKind VK = (getLangOpts().CPlusPlus && !(isFileScope && literalType->isArrayType())) ? VK_PRValue : VK_LValue; if (isFileScope) if (auto ILE = dyn_cast(LiteralExpr)) for (unsigned i = 0, j = ILE->getNumInits(); i != j; i++) { Expr *Init = ILE->getInit(i); ILE->setInit(i, ConstantExpr::Create(Context, Init)); } auto *E = new (Context) CompoundLiteralExpr(LParenLoc, TInfo, literalType, VK, LiteralExpr, isFileScope); if (isFileScope) { if (!LiteralExpr->isTypeDependent() && !LiteralExpr->isValueDependent() && !literalType->isDependentType()) // C99 6.5.2.5p3 if (CheckForConstantInitializer(LiteralExpr, literalType)) return ExprError(); } else if (literalType.getAddressSpace() != LangAS::opencl_private && literalType.getAddressSpace() != LangAS::Default) { // Embedded-C extensions to C99 6.5.2.5: // "If the compound literal occurs inside the body of a function, the // type name shall not be qualified by an address-space qualifier." Diag(LParenLoc, diag::err_compound_literal_with_address_space) << SourceRange(LParenLoc, LiteralExpr->getSourceRange().getEnd()); return ExprError(); } if (!isFileScope && !getLangOpts().CPlusPlus) { // Compound literals that have automatic storage duration are destroyed at // the end of the scope in C; in C++, they're just temporaries. // Emit diagnostics if it is or contains a C union type that is non-trivial // to destruct. if (E->getType().hasNonTrivialToPrimitiveDestructCUnion()) checkNonTrivialCUnion(E->getType(), E->getExprLoc(), NTCUC_CompoundLiteral, NTCUK_Destruct); // Diagnose jumps that enter or exit the lifetime of the compound literal. if (literalType.isDestructedType()) { Cleanup.setExprNeedsCleanups(true); ExprCleanupObjects.push_back(E); getCurFunction()->setHasBranchProtectedScope(); } } if (E->getType().hasNonTrivialToPrimitiveDefaultInitializeCUnion() || E->getType().hasNonTrivialToPrimitiveCopyCUnion()) checkNonTrivialCUnionInInitializer(E->getInitializer(), E->getInitializer()->getExprLoc()); return MaybeBindToTemporary(E); } ExprResult Sema::ActOnInitList(SourceLocation LBraceLoc, MultiExprArg InitArgList, SourceLocation RBraceLoc) { // Only produce each kind of designated initialization diagnostic once. SourceLocation FirstDesignator; bool DiagnosedArrayDesignator = false; bool DiagnosedNestedDesignator = false; bool DiagnosedMixedDesignator = false; // Check that any designated initializers are syntactically valid in the // current language mode. for (unsigned I = 0, E = InitArgList.size(); I != E; ++I) { if (auto *DIE = dyn_cast(InitArgList[I])) { if (FirstDesignator.isInvalid()) FirstDesignator = DIE->getBeginLoc(); if (!getLangOpts().CPlusPlus) break; if (!DiagnosedNestedDesignator && DIE->size() > 1) { DiagnosedNestedDesignator = true; Diag(DIE->getBeginLoc(), diag::ext_designated_init_nested) << DIE->getDesignatorsSourceRange(); } for (auto &Desig : DIE->designators()) { if (!Desig.isFieldDesignator() && !DiagnosedArrayDesignator) { DiagnosedArrayDesignator = true; Diag(Desig.getBeginLoc(), diag::ext_designated_init_array) << Desig.getSourceRange(); } } if (!DiagnosedMixedDesignator && !isa(InitArgList[0])) { DiagnosedMixedDesignator = true; Diag(DIE->getBeginLoc(), diag::ext_designated_init_mixed) << DIE->getSourceRange(); Diag(InitArgList[0]->getBeginLoc(), diag::note_designated_init_mixed) << InitArgList[0]->getSourceRange(); } } else if (getLangOpts().CPlusPlus && !DiagnosedMixedDesignator && isa(InitArgList[0])) { DiagnosedMixedDesignator = true; auto *DIE = cast(InitArgList[0]); Diag(DIE->getBeginLoc(), diag::ext_designated_init_mixed) << DIE->getSourceRange(); Diag(InitArgList[I]->getBeginLoc(), diag::note_designated_init_mixed) << InitArgList[I]->getSourceRange(); } } if (FirstDesignator.isValid()) { // Only diagnose designated initiaization as a C++20 extension if we didn't // already diagnose use of (non-C++20) C99 designator syntax. if (getLangOpts().CPlusPlus && !DiagnosedArrayDesignator && !DiagnosedNestedDesignator && !DiagnosedMixedDesignator) { Diag(FirstDesignator, getLangOpts().CPlusPlus20 ? diag::warn_cxx17_compat_designated_init : diag::ext_cxx_designated_init); } else if (!getLangOpts().CPlusPlus && !getLangOpts().C99) { Diag(FirstDesignator, diag::ext_designated_init); } } return BuildInitList(LBraceLoc, InitArgList, RBraceLoc); } ExprResult Sema::BuildInitList(SourceLocation LBraceLoc, MultiExprArg InitArgList, SourceLocation RBraceLoc) { // Semantic analysis for initializers is done by ActOnDeclarator() and // CheckInitializer() - it requires knowledge of the object being initialized. // Immediately handle non-overload placeholders. Overloads can be // resolved contextually, but everything else here can't. for (unsigned I = 0, E = InitArgList.size(); I != E; ++I) { if (InitArgList[I]->getType()->isNonOverloadPlaceholderType()) { ExprResult result = CheckPlaceholderExpr(InitArgList[I]); // Ignore failures; dropping the entire initializer list because // of one failure would be terrible for indexing/etc. if (result.isInvalid()) continue; InitArgList[I] = result.get(); } } InitListExpr *E = new (Context) InitListExpr(Context, LBraceLoc, InitArgList, RBraceLoc); E->setType(Context.VoidTy); // FIXME: just a place holder for now. return E; } /// Do an explicit extend of the given block pointer if we're in ARC. void Sema::maybeExtendBlockObject(ExprResult &E) { assert(E.get()->getType()->isBlockPointerType()); assert(E.get()->isPRValue()); // Only do this in an r-value context. if (!getLangOpts().ObjCAutoRefCount) return; E = ImplicitCastExpr::Create( Context, E.get()->getType(), CK_ARCExtendBlockObject, E.get(), /*base path*/ nullptr, VK_PRValue, FPOptionsOverride()); Cleanup.setExprNeedsCleanups(true); } /// Prepare a conversion of the given expression to an ObjC object /// pointer type. CastKind Sema::PrepareCastToObjCObjectPointer(ExprResult &E) { QualType type = E.get()->getType(); if (type->isObjCObjectPointerType()) { return CK_BitCast; } else if (type->isBlockPointerType()) { maybeExtendBlockObject(E); return CK_BlockPointerToObjCPointerCast; } else { assert(type->isPointerType()); return CK_CPointerToObjCPointerCast; } } /// Prepares for a scalar cast, performing all the necessary stages /// except the final cast and returning the kind required. CastKind Sema::PrepareScalarCast(ExprResult &Src, QualType DestTy) { // Both Src and Dest are scalar types, i.e. arithmetic or pointer. // Also, callers should have filtered out the invalid cases with // pointers. Everything else should be possible. QualType SrcTy = Src.get()->getType(); if (Context.hasSameUnqualifiedType(SrcTy, DestTy)) return CK_NoOp; switch (Type::ScalarTypeKind SrcKind = SrcTy->getScalarTypeKind()) { case Type::STK_MemberPointer: llvm_unreachable("member pointer type in C"); case Type::STK_CPointer: case Type::STK_BlockPointer: case Type::STK_ObjCObjectPointer: switch (DestTy->getScalarTypeKind()) { case Type::STK_CPointer: { LangAS SrcAS = SrcTy->getPointeeType().getAddressSpace(); LangAS DestAS = DestTy->getPointeeType().getAddressSpace(); if (SrcAS != DestAS) return CK_AddressSpaceConversion; if (Context.hasCvrSimilarType(SrcTy, DestTy)) return CK_NoOp; return CK_BitCast; } case Type::STK_BlockPointer: return (SrcKind == Type::STK_BlockPointer ? CK_BitCast : CK_AnyPointerToBlockPointerCast); case Type::STK_ObjCObjectPointer: if (SrcKind == Type::STK_ObjCObjectPointer) return CK_BitCast; if (SrcKind == Type::STK_CPointer) return CK_CPointerToObjCPointerCast; maybeExtendBlockObject(Src); return CK_BlockPointerToObjCPointerCast; case Type::STK_Bool: return CK_PointerToBoolean; case Type::STK_Integral: return CK_PointerToIntegral; case Type::STK_Floating: case Type::STK_FloatingComplex: case Type::STK_IntegralComplex: case Type::STK_MemberPointer: case Type::STK_FixedPoint: llvm_unreachable("illegal cast from pointer"); } llvm_unreachable("Should have returned before this"); case Type::STK_FixedPoint: switch (DestTy->getScalarTypeKind()) { case Type::STK_FixedPoint: return CK_FixedPointCast; case Type::STK_Bool: return CK_FixedPointToBoolean; case Type::STK_Integral: return CK_FixedPointToIntegral; case Type::STK_Floating: return CK_FixedPointToFloating; case Type::STK_IntegralComplex: case Type::STK_FloatingComplex: Diag(Src.get()->getExprLoc(), diag::err_unimplemented_conversion_with_fixed_point_type) << DestTy; return CK_IntegralCast; case Type::STK_CPointer: case Type::STK_ObjCObjectPointer: case Type::STK_BlockPointer: case Type::STK_MemberPointer: llvm_unreachable("illegal cast to pointer type"); } llvm_unreachable("Should have returned before this"); case Type::STK_Bool: // casting from bool is like casting from an integer case Type::STK_Integral: switch (DestTy->getScalarTypeKind()) { case Type::STK_CPointer: case Type::STK_ObjCObjectPointer: case Type::STK_BlockPointer: if (Src.get()->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull)) return CK_NullToPointer; return CK_IntegralToPointer; case Type::STK_Bool: return CK_IntegralToBoolean; case Type::STK_Integral: return CK_IntegralCast; case Type::STK_Floating: return CK_IntegralToFloating; case Type::STK_IntegralComplex: Src = ImpCastExprToType(Src.get(), DestTy->castAs()->getElementType(), CK_IntegralCast); return CK_IntegralRealToComplex; case Type::STK_FloatingComplex: Src = ImpCastExprToType(Src.get(), DestTy->castAs()->getElementType(), CK_IntegralToFloating); return CK_FloatingRealToComplex; case Type::STK_MemberPointer: llvm_unreachable("member pointer type in C"); case Type::STK_FixedPoint: return CK_IntegralToFixedPoint; } llvm_unreachable("Should have returned before this"); case Type::STK_Floating: switch (DestTy->getScalarTypeKind()) { case Type::STK_Floating: return CK_FloatingCast; case Type::STK_Bool: return CK_FloatingToBoolean; case Type::STK_Integral: return CK_FloatingToIntegral; case Type::STK_FloatingComplex: Src = ImpCastExprToType(Src.get(), DestTy->castAs()->getElementType(), CK_FloatingCast); return CK_FloatingRealToComplex; case Type::STK_IntegralComplex: Src = ImpCastExprToType(Src.get(), DestTy->castAs()->getElementType(), CK_FloatingToIntegral); return CK_IntegralRealToComplex; case Type::STK_CPointer: case Type::STK_ObjCObjectPointer: case Type::STK_BlockPointer: llvm_unreachable("valid float->pointer cast?"); case Type::STK_MemberPointer: llvm_unreachable("member pointer type in C"); case Type::STK_FixedPoint: return CK_FloatingToFixedPoint; } llvm_unreachable("Should have returned before this"); case Type::STK_FloatingComplex: switch (DestTy->getScalarTypeKind()) { case Type::STK_FloatingComplex: return CK_FloatingComplexCast; case Type::STK_IntegralComplex: return CK_FloatingComplexToIntegralComplex; case Type::STK_Floating: { QualType ET = SrcTy->castAs()->getElementType(); if (Context.hasSameType(ET, DestTy)) return CK_FloatingComplexToReal; Src = ImpCastExprToType(Src.get(), ET, CK_FloatingComplexToReal); return CK_FloatingCast; } case Type::STK_Bool: return CK_FloatingComplexToBoolean; case Type::STK_Integral: Src = ImpCastExprToType(Src.get(), SrcTy->castAs()->getElementType(), CK_FloatingComplexToReal); return CK_FloatingToIntegral; case Type::STK_CPointer: case Type::STK_ObjCObjectPointer: case Type::STK_BlockPointer: llvm_unreachable("valid complex float->pointer cast?"); case Type::STK_MemberPointer: llvm_unreachable("member pointer type in C"); case Type::STK_FixedPoint: Diag(Src.get()->getExprLoc(), diag::err_unimplemented_conversion_with_fixed_point_type) << SrcTy; return CK_IntegralCast; } llvm_unreachable("Should have returned before this"); case Type::STK_IntegralComplex: switch (DestTy->getScalarTypeKind()) { case Type::STK_FloatingComplex: return CK_IntegralComplexToFloatingComplex; case Type::STK_IntegralComplex: return CK_IntegralComplexCast; case Type::STK_Integral: { QualType ET = SrcTy->castAs()->getElementType(); if (Context.hasSameType(ET, DestTy)) return CK_IntegralComplexToReal; Src = ImpCastExprToType(Src.get(), ET, CK_IntegralComplexToReal); return CK_IntegralCast; } case Type::STK_Bool: return CK_IntegralComplexToBoolean; case Type::STK_Floating: Src = ImpCastExprToType(Src.get(), SrcTy->castAs()->getElementType(), CK_IntegralComplexToReal); return CK_IntegralToFloating; case Type::STK_CPointer: case Type::STK_ObjCObjectPointer: case Type::STK_BlockPointer: llvm_unreachable("valid complex int->pointer cast?"); case Type::STK_MemberPointer: llvm_unreachable("member pointer type in C"); case Type::STK_FixedPoint: Diag(Src.get()->getExprLoc(), diag::err_unimplemented_conversion_with_fixed_point_type) << SrcTy; return CK_IntegralCast; } llvm_unreachable("Should have returned before this"); } llvm_unreachable("Unhandled scalar cast"); } static bool breakDownVectorType(QualType type, uint64_t &len, QualType &eltType) { // Vectors are simple. if (const VectorType *vecType = type->getAs()) { len = vecType->getNumElements(); eltType = vecType->getElementType(); assert(eltType->isScalarType()); return true; } // We allow lax conversion to and from non-vector types, but only if // they're real types (i.e. non-complex, non-pointer scalar types). if (!type->isRealType()) return false; len = 1; eltType = type; return true; } /// Are the two types SVE-bitcast-compatible types? I.e. is bitcasting from the /// first SVE type (e.g. an SVE VLAT) to the second type (e.g. an SVE VLST) /// allowed? /// /// This will also return false if the two given types do not make sense from /// the perspective of SVE bitcasts. bool Sema::isValidSveBitcast(QualType srcTy, QualType destTy) { assert(srcTy->isVectorType() || destTy->isVectorType()); auto ValidScalableConversion = [](QualType FirstType, QualType SecondType) { if (!FirstType->isSizelessBuiltinType()) return false; const auto *VecTy = SecondType->getAs(); return VecTy && VecTy->getVectorKind() == VectorType::SveFixedLengthDataVector; }; return ValidScalableConversion(srcTy, destTy) || ValidScalableConversion(destTy, srcTy); } /// Are the two types matrix types and do they have the same dimensions i.e. /// do they have the same number of rows and the same number of columns? bool Sema::areMatrixTypesOfTheSameDimension(QualType srcTy, QualType destTy) { if (!destTy->isMatrixType() || !srcTy->isMatrixType()) return false; const ConstantMatrixType *matSrcType = srcTy->getAs(); const ConstantMatrixType *matDestType = destTy->getAs(); return matSrcType->getNumRows() == matDestType->getNumRows() && matSrcType->getNumColumns() == matDestType->getNumColumns(); } bool Sema::areVectorTypesSameSize(QualType SrcTy, QualType DestTy) { assert(DestTy->isVectorType() || SrcTy->isVectorType()); uint64_t SrcLen, DestLen; QualType SrcEltTy, DestEltTy; if (!breakDownVectorType(SrcTy, SrcLen, SrcEltTy)) return false; if (!breakDownVectorType(DestTy, DestLen, DestEltTy)) return false; // ASTContext::getTypeSize will return the size rounded up to a // power of 2, so instead of using that, we need to use the raw // element size multiplied by the element count. uint64_t SrcEltSize = Context.getTypeSize(SrcEltTy); uint64_t DestEltSize = Context.getTypeSize(DestEltTy); return (SrcLen * SrcEltSize == DestLen * DestEltSize); } // This returns true if at least one of the types is an altivec vector. bool Sema::anyAltivecTypes(QualType SrcTy, QualType DestTy) { assert((DestTy->isVectorType() || SrcTy->isVectorType()) && "expected at least one type to be a vector here"); bool IsSrcTyAltivec = SrcTy->isVectorType() && (SrcTy->castAs()->getVectorKind() == VectorType::AltiVecVector); bool IsDestTyAltivec = DestTy->isVectorType() && (DestTy->castAs()->getVectorKind() == VectorType::AltiVecVector); return (IsSrcTyAltivec || IsDestTyAltivec); } // This returns true if both vectors have the same element type. bool Sema::areSameVectorElemTypes(QualType SrcTy, QualType DestTy) { assert((DestTy->isVectorType() || SrcTy->isVectorType()) && "expected at least one type to be a vector here"); uint64_t SrcLen, DestLen; QualType SrcEltTy, DestEltTy; if (!breakDownVectorType(SrcTy, SrcLen, SrcEltTy)) return false; if (!breakDownVectorType(DestTy, DestLen, DestEltTy)) return false; return (SrcEltTy == DestEltTy); } /// Are the two types lax-compatible vector types? That is, given /// that one of them is a vector, do they have equal storage sizes, /// where the storage size is the number of elements times the element /// size? /// /// This will also return false if either of the types is neither a /// vector nor a real type. bool Sema::areLaxCompatibleVectorTypes(QualType srcTy, QualType destTy) { assert(destTy->isVectorType() || srcTy->isVectorType()); // Disallow lax conversions between scalars and ExtVectors (these // conversions are allowed for other vector types because common headers // depend on them). Most scalar OP ExtVector cases are handled by the // splat path anyway, which does what we want (convert, not bitcast). // What this rules out for ExtVectors is crazy things like char4*float. if (srcTy->isScalarType() && destTy->isExtVectorType()) return false; if (destTy->isScalarType() && srcTy->isExtVectorType()) return false; return areVectorTypesSameSize(srcTy, destTy); } /// Is this a legal conversion between two types, one of which is /// known to be a vector type? bool Sema::isLaxVectorConversion(QualType srcTy, QualType destTy) { assert(destTy->isVectorType() || srcTy->isVectorType()); switch (Context.getLangOpts().getLaxVectorConversions()) { case LangOptions::LaxVectorConversionKind::None: return false; case LangOptions::LaxVectorConversionKind::Integer: if (!srcTy->isIntegralOrEnumerationType()) { auto *Vec = srcTy->getAs(); if (!Vec || !Vec->getElementType()->isIntegralOrEnumerationType()) return false; } if (!destTy->isIntegralOrEnumerationType()) { auto *Vec = destTy->getAs(); if (!Vec || !Vec->getElementType()->isIntegralOrEnumerationType()) return false; } // OK, integer (vector) -> integer (vector) bitcast. break; case LangOptions::LaxVectorConversionKind::All: break; } return areLaxCompatibleVectorTypes(srcTy, destTy); } bool Sema::CheckMatrixCast(SourceRange R, QualType DestTy, QualType SrcTy, CastKind &Kind) { if (SrcTy->isMatrixType() && DestTy->isMatrixType()) { if (!areMatrixTypesOfTheSameDimension(SrcTy, DestTy)) { return Diag(R.getBegin(), diag::err_invalid_conversion_between_matrixes) << DestTy << SrcTy << R; } } else if (SrcTy->isMatrixType()) { return Diag(R.getBegin(), diag::err_invalid_conversion_between_matrix_and_type) << SrcTy << DestTy << R; } else if (DestTy->isMatrixType()) { return Diag(R.getBegin(), diag::err_invalid_conversion_between_matrix_and_type) << DestTy << SrcTy << R; } Kind = CK_MatrixCast; return false; } bool Sema::CheckVectorCast(SourceRange R, QualType VectorTy, QualType Ty, CastKind &Kind) { assert(VectorTy->isVectorType() && "Not a vector type!"); if (Ty->isVectorType() || Ty->isIntegralType(Context)) { if (!areLaxCompatibleVectorTypes(Ty, VectorTy)) return Diag(R.getBegin(), Ty->isVectorType() ? diag::err_invalid_conversion_between_vectors : diag::err_invalid_conversion_between_vector_and_integer) << VectorTy << Ty << R; } else return Diag(R.getBegin(), diag::err_invalid_conversion_between_vector_and_scalar) << VectorTy << Ty << R; Kind = CK_BitCast; return false; } ExprResult Sema::prepareVectorSplat(QualType VectorTy, Expr *SplattedExpr) { QualType DestElemTy = VectorTy->castAs()->getElementType(); if (DestElemTy == SplattedExpr->getType()) return SplattedExpr; assert(DestElemTy->isFloatingType() || DestElemTy->isIntegralOrEnumerationType()); CastKind CK; if (VectorTy->isExtVectorType() && SplattedExpr->getType()->isBooleanType()) { // OpenCL requires that we convert `true` boolean expressions to -1, but // only when splatting vectors. if (DestElemTy->isFloatingType()) { // To avoid having to have a CK_BooleanToSignedFloating cast kind, we cast // in two steps: boolean to signed integral, then to floating. ExprResult CastExprRes = ImpCastExprToType(SplattedExpr, Context.IntTy, CK_BooleanToSignedIntegral); SplattedExpr = CastExprRes.get(); CK = CK_IntegralToFloating; } else { CK = CK_BooleanToSignedIntegral; } } else { ExprResult CastExprRes = SplattedExpr; CK = PrepareScalarCast(CastExprRes, DestElemTy); if (CastExprRes.isInvalid()) return ExprError(); SplattedExpr = CastExprRes.get(); } return ImpCastExprToType(SplattedExpr, DestElemTy, CK); } ExprResult Sema::CheckExtVectorCast(SourceRange R, QualType DestTy, Expr *CastExpr, CastKind &Kind) { assert(DestTy->isExtVectorType() && "Not an extended vector type!"); QualType SrcTy = CastExpr->getType(); // If SrcTy is a VectorType, the total size must match to explicitly cast to // an ExtVectorType. // In OpenCL, casts between vectors of different types are not allowed. // (See OpenCL 6.2). if (SrcTy->isVectorType()) { if (!areLaxCompatibleVectorTypes(SrcTy, DestTy) || (getLangOpts().OpenCL && !Context.hasSameUnqualifiedType(DestTy, SrcTy))) { Diag(R.getBegin(),diag::err_invalid_conversion_between_ext_vectors) << DestTy << SrcTy << R; return ExprError(); } Kind = CK_BitCast; return CastExpr; } // All non-pointer scalars can be cast to ExtVector type. The appropriate // conversion will take place first from scalar to elt type, and then // splat from elt type to vector. if (SrcTy->isPointerType()) return Diag(R.getBegin(), diag::err_invalid_conversion_between_vector_and_scalar) << DestTy << SrcTy << R; Kind = CK_VectorSplat; return prepareVectorSplat(DestTy, CastExpr); } ExprResult Sema::ActOnCastExpr(Scope *S, SourceLocation LParenLoc, Declarator &D, ParsedType &Ty, SourceLocation RParenLoc, Expr *CastExpr) { assert(!D.isInvalidType() && (CastExpr != nullptr) && "ActOnCastExpr(): missing type or expr"); TypeSourceInfo *castTInfo = GetTypeForDeclaratorCast(D, CastExpr->getType()); if (D.isInvalidType()) return ExprError(); if (getLangOpts().CPlusPlus) { // Check that there are no default arguments (C++ only). CheckExtraCXXDefaultArguments(D); } else { // Make sure any TypoExprs have been dealt with. ExprResult Res = CorrectDelayedTyposInExpr(CastExpr); if (!Res.isUsable()) return ExprError(); CastExpr = Res.get(); } checkUnusedDeclAttributes(D); QualType castType = castTInfo->getType(); Ty = CreateParsedType(castType, castTInfo); bool isVectorLiteral = false; // Check for an altivec or OpenCL literal, // i.e. all the elements are integer constants. ParenExpr *PE = dyn_cast(CastExpr); ParenListExpr *PLE = dyn_cast(CastExpr); if ((getLangOpts().AltiVec || getLangOpts().ZVector || getLangOpts().OpenCL) && castType->isVectorType() && (PE || PLE)) { if (PLE && PLE->getNumExprs() == 0) { Diag(PLE->getExprLoc(), diag::err_altivec_empty_initializer); return ExprError(); } if (PE || PLE->getNumExprs() == 1) { Expr *E = (PE ? PE->getSubExpr() : PLE->getExpr(0)); if (!E->isTypeDependent() && !E->getType()->isVectorType()) isVectorLiteral = true; } else isVectorLiteral = true; } // If this is a vector initializer, '(' type ')' '(' init, ..., init ')' // then handle it as such. if (isVectorLiteral) return BuildVectorLiteral(LParenLoc, RParenLoc, CastExpr, castTInfo); // If the Expr being casted is a ParenListExpr, handle it specially. // This is not an AltiVec-style cast, so turn the ParenListExpr into a // sequence of BinOp comma operators. if (isa(CastExpr)) { ExprResult Result = MaybeConvertParenListExprToParenExpr(S, CastExpr); if (Result.isInvalid()) return ExprError(); CastExpr = Result.get(); } if (getLangOpts().CPlusPlus && !castType->isVoidType()) Diag(LParenLoc, diag::warn_old_style_cast) << CastExpr->getSourceRange(); CheckTollFreeBridgeCast(castType, CastExpr); CheckObjCBridgeRelatedCast(castType, CastExpr); DiscardMisalignedMemberAddress(castType.getTypePtr(), CastExpr); return BuildCStyleCastExpr(LParenLoc, castTInfo, RParenLoc, CastExpr); } ExprResult Sema::BuildVectorLiteral(SourceLocation LParenLoc, SourceLocation RParenLoc, Expr *E, TypeSourceInfo *TInfo) { assert((isa(E) || isa(E)) && "Expected paren or paren list expression"); Expr **exprs; unsigned numExprs; Expr *subExpr; SourceLocation LiteralLParenLoc, LiteralRParenLoc; if (ParenListExpr *PE = dyn_cast(E)) { LiteralLParenLoc = PE->getLParenLoc(); LiteralRParenLoc = PE->getRParenLoc(); exprs = PE->getExprs(); numExprs = PE->getNumExprs(); } else { // isa by assertion at function entrance LiteralLParenLoc = cast(E)->getLParen(); LiteralRParenLoc = cast(E)->getRParen(); subExpr = cast(E)->getSubExpr(); exprs = &subExpr; numExprs = 1; } QualType Ty = TInfo->getType(); assert(Ty->isVectorType() && "Expected vector type"); SmallVector initExprs; const VectorType *VTy = Ty->castAs(); unsigned numElems = VTy->getNumElements(); // '(...)' form of vector initialization in AltiVec: the number of // initializers must be one or must match the size of the vector. // If a single value is specified in the initializer then it will be // replicated to all the components of the vector if (CheckAltivecInitFromScalar(E->getSourceRange(), Ty, VTy->getElementType())) return ExprError(); if (ShouldSplatAltivecScalarInCast(VTy)) { // The number of initializers must be one or must match the size of the // vector. If a single value is specified in the initializer then it will // be replicated to all the components of the vector if (numExprs == 1) { QualType ElemTy = VTy->getElementType(); ExprResult Literal = DefaultLvalueConversion(exprs[0]); if (Literal.isInvalid()) return ExprError(); Literal = ImpCastExprToType(Literal.get(), ElemTy, PrepareScalarCast(Literal, ElemTy)); return BuildCStyleCastExpr(LParenLoc, TInfo, RParenLoc, Literal.get()); } else if (numExprs < numElems) { Diag(E->getExprLoc(), diag::err_incorrect_number_of_vector_initializers); return ExprError(); } else initExprs.append(exprs, exprs + numExprs); } else { // For OpenCL, when the number of initializers is a single value, // it will be replicated to all components of the vector. if (getLangOpts().OpenCL && VTy->getVectorKind() == VectorType::GenericVector && numExprs == 1) { QualType ElemTy = VTy->getElementType(); ExprResult Literal = DefaultLvalueConversion(exprs[0]); if (Literal.isInvalid()) return ExprError(); Literal = ImpCastExprToType(Literal.get(), ElemTy, PrepareScalarCast(Literal, ElemTy)); return BuildCStyleCastExpr(LParenLoc, TInfo, RParenLoc, Literal.get()); } initExprs.append(exprs, exprs + numExprs); } // FIXME: This means that pretty-printing the final AST will produce curly // braces instead of the original commas. InitListExpr *initE = new (Context) InitListExpr(Context, LiteralLParenLoc, initExprs, LiteralRParenLoc); initE->setType(Ty); return BuildCompoundLiteralExpr(LParenLoc, TInfo, RParenLoc, initE); } /// This is not an AltiVec-style cast or or C++ direct-initialization, so turn /// the ParenListExpr into a sequence of comma binary operators. ExprResult Sema::MaybeConvertParenListExprToParenExpr(Scope *S, Expr *OrigExpr) { ParenListExpr *E = dyn_cast(OrigExpr); if (!E) return OrigExpr; ExprResult Result(E->getExpr(0)); for (unsigned i = 1, e = E->getNumExprs(); i != e && !Result.isInvalid(); ++i) Result = ActOnBinOp(S, E->getExprLoc(), tok::comma, Result.get(), E->getExpr(i)); if (Result.isInvalid()) return ExprError(); return ActOnParenExpr(E->getLParenLoc(), E->getRParenLoc(), Result.get()); } ExprResult Sema::ActOnParenListExpr(SourceLocation L, SourceLocation R, MultiExprArg Val) { return ParenListExpr::Create(Context, L, Val, R); } /// Emit a specialized diagnostic when one expression is a null pointer /// constant and the other is not a pointer. Returns true if a diagnostic is /// emitted. bool Sema::DiagnoseConditionalForNull(Expr *LHSExpr, Expr *RHSExpr, SourceLocation QuestionLoc) { Expr *NullExpr = LHSExpr; Expr *NonPointerExpr = RHSExpr; Expr::NullPointerConstantKind NullKind = NullExpr->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNotNull); if (NullKind == Expr::NPCK_NotNull) { NullExpr = RHSExpr; NonPointerExpr = LHSExpr; NullKind = NullExpr->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNotNull); } if (NullKind == Expr::NPCK_NotNull) return false; if (NullKind == Expr::NPCK_ZeroExpression) return false; if (NullKind == Expr::NPCK_ZeroLiteral) { // In this case, check to make sure that we got here from a "NULL" // string in the source code. NullExpr = NullExpr->IgnoreParenImpCasts(); SourceLocation loc = NullExpr->getExprLoc(); if (!findMacroSpelling(loc, "NULL")) return false; } int DiagType = (NullKind == Expr::NPCK_CXX11_nullptr); Diag(QuestionLoc, diag::err_typecheck_cond_incompatible_operands_null) << NonPointerExpr->getType() << DiagType << NonPointerExpr->getSourceRange(); return true; } /// Return false if the condition expression is valid, true otherwise. static bool checkCondition(Sema &S, Expr *Cond, SourceLocation QuestionLoc) { QualType CondTy = Cond->getType(); // OpenCL v1.1 s6.3.i says the condition cannot be a floating point type. if (S.getLangOpts().OpenCL && CondTy->isFloatingType()) { S.Diag(QuestionLoc, diag::err_typecheck_cond_expect_nonfloat) << CondTy << Cond->getSourceRange(); return true; } // C99 6.5.15p2 if (CondTy->isScalarType()) return false; S.Diag(QuestionLoc, diag::err_typecheck_cond_expect_scalar) << CondTy << Cond->getSourceRange(); return true; } /// Handle when one or both operands are void type. static QualType checkConditionalVoidType(Sema &S, ExprResult &LHS, ExprResult &RHS) { Expr *LHSExpr = LHS.get(); Expr *RHSExpr = RHS.get(); if (!LHSExpr->getType()->isVoidType()) S.Diag(RHSExpr->getBeginLoc(), diag::ext_typecheck_cond_one_void) << RHSExpr->getSourceRange(); if (!RHSExpr->getType()->isVoidType()) S.Diag(LHSExpr->getBeginLoc(), diag::ext_typecheck_cond_one_void) << LHSExpr->getSourceRange(); LHS = S.ImpCastExprToType(LHS.get(), S.Context.VoidTy, CK_ToVoid); RHS = S.ImpCastExprToType(RHS.get(), S.Context.VoidTy, CK_ToVoid); return S.Context.VoidTy; } /// Return false if the NullExpr can be promoted to PointerTy, /// true otherwise. static bool checkConditionalNullPointer(Sema &S, ExprResult &NullExpr, QualType PointerTy) { if ((!PointerTy->isAnyPointerType() && !PointerTy->isBlockPointerType()) || !NullExpr.get()->isNullPointerConstant(S.Context, Expr::NPC_ValueDependentIsNull)) return true; NullExpr = S.ImpCastExprToType(NullExpr.get(), PointerTy, CK_NullToPointer); return false; } /// Checks compatibility between two pointers and return the resulting /// type. static QualType checkConditionalPointerCompatibility(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc) { QualType LHSTy = LHS.get()->getType(); QualType RHSTy = RHS.get()->getType(); if (S.Context.hasSameType(LHSTy, RHSTy)) { // Two identical pointers types are always compatible. return LHSTy; } QualType lhptee, rhptee; // Get the pointee types. bool IsBlockPointer = false; if (const BlockPointerType *LHSBTy = LHSTy->getAs()) { lhptee = LHSBTy->getPointeeType(); rhptee = RHSTy->castAs()->getPointeeType(); IsBlockPointer = true; } else { lhptee = LHSTy->castAs()->getPointeeType(); rhptee = RHSTy->castAs()->getPointeeType(); } // C99 6.5.15p6: If both operands are pointers to compatible types or to // differently qualified versions of compatible types, the result type is // a pointer to an appropriately qualified version of the composite // type. // Only CVR-qualifiers exist in the standard, and the differently-qualified // clause doesn't make sense for our extensions. E.g. address space 2 should // be incompatible with address space 3: they may live on different devices or // anything. Qualifiers lhQual = lhptee.getQualifiers(); Qualifiers rhQual = rhptee.getQualifiers(); LangAS ResultAddrSpace = LangAS::Default; LangAS LAddrSpace = lhQual.getAddressSpace(); LangAS RAddrSpace = rhQual.getAddressSpace(); // OpenCL v1.1 s6.5 - Conversion between pointers to distinct address // spaces is disallowed. if (lhQual.isAddressSpaceSupersetOf(rhQual)) ResultAddrSpace = LAddrSpace; else if (rhQual.isAddressSpaceSupersetOf(lhQual)) ResultAddrSpace = RAddrSpace; else { S.Diag(Loc, diag::err_typecheck_op_on_nonoverlapping_address_space_pointers) << LHSTy << RHSTy << 2 << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } unsigned MergedCVRQual = lhQual.getCVRQualifiers() | rhQual.getCVRQualifiers(); auto LHSCastKind = CK_BitCast, RHSCastKind = CK_BitCast; lhQual.removeCVRQualifiers(); rhQual.removeCVRQualifiers(); // OpenCL v2.0 specification doesn't extend compatibility of type qualifiers // (C99 6.7.3) for address spaces. We assume that the check should behave in // the same manner as it's defined for CVR qualifiers, so for OpenCL two // qual types are compatible iff // * corresponded types are compatible // * CVR qualifiers are equal // * address spaces are equal // Thus for conditional operator we merge CVR and address space unqualified // pointees and if there is a composite type we return a pointer to it with // merged qualifiers. LHSCastKind = LAddrSpace == ResultAddrSpace ? CK_BitCast : CK_AddressSpaceConversion; RHSCastKind = RAddrSpace == ResultAddrSpace ? CK_BitCast : CK_AddressSpaceConversion; lhQual.removeAddressSpace(); rhQual.removeAddressSpace(); lhptee = S.Context.getQualifiedType(lhptee.getUnqualifiedType(), lhQual); rhptee = S.Context.getQualifiedType(rhptee.getUnqualifiedType(), rhQual); QualType CompositeTy = S.Context.mergeTypes(lhptee, rhptee); if (CompositeTy.isNull()) { // In this situation, we assume void* type. No especially good // reason, but this is what gcc does, and we do have to pick // to get a consistent AST. QualType incompatTy; incompatTy = S.Context.getPointerType( S.Context.getAddrSpaceQualType(S.Context.VoidTy, ResultAddrSpace)); LHS = S.ImpCastExprToType(LHS.get(), incompatTy, LHSCastKind); RHS = S.ImpCastExprToType(RHS.get(), incompatTy, RHSCastKind); // FIXME: For OpenCL the warning emission and cast to void* leaves a room // for casts between types with incompatible address space qualifiers. // For the following code the compiler produces casts between global and // local address spaces of the corresponded innermost pointees: // local int *global *a; // global int *global *b; // a = (0 ? a : b); // see C99 6.5.16.1.p1. S.Diag(Loc, diag::ext_typecheck_cond_incompatible_pointers) << LHSTy << RHSTy << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return incompatTy; } // The pointer types are compatible. // In case of OpenCL ResultTy should have the address space qualifier // which is a superset of address spaces of both the 2nd and the 3rd // operands of the conditional operator. QualType ResultTy = [&, ResultAddrSpace]() { if (S.getLangOpts().OpenCL) { Qualifiers CompositeQuals = CompositeTy.getQualifiers(); CompositeQuals.setAddressSpace(ResultAddrSpace); return S.Context .getQualifiedType(CompositeTy.getUnqualifiedType(), CompositeQuals) .withCVRQualifiers(MergedCVRQual); } return CompositeTy.withCVRQualifiers(MergedCVRQual); }(); if (IsBlockPointer) ResultTy = S.Context.getBlockPointerType(ResultTy); else ResultTy = S.Context.getPointerType(ResultTy); LHS = S.ImpCastExprToType(LHS.get(), ResultTy, LHSCastKind); RHS = S.ImpCastExprToType(RHS.get(), ResultTy, RHSCastKind); return ResultTy; } /// Return the resulting type when the operands are both block pointers. static QualType checkConditionalBlockPointerCompatibility(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc) { QualType LHSTy = LHS.get()->getType(); QualType RHSTy = RHS.get()->getType(); if (!LHSTy->isBlockPointerType() || !RHSTy->isBlockPointerType()) { if (LHSTy->isVoidPointerType() || RHSTy->isVoidPointerType()) { QualType destType = S.Context.getPointerType(S.Context.VoidTy); LHS = S.ImpCastExprToType(LHS.get(), destType, CK_BitCast); RHS = S.ImpCastExprToType(RHS.get(), destType, CK_BitCast); return destType; } S.Diag(Loc, diag::err_typecheck_cond_incompatible_operands) << LHSTy << RHSTy << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } // We have 2 block pointer types. return checkConditionalPointerCompatibility(S, LHS, RHS, Loc); } /// Return the resulting type when the operands are both pointers. static QualType checkConditionalObjectPointersCompatibility(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc) { // get the pointer types QualType LHSTy = LHS.get()->getType(); QualType RHSTy = RHS.get()->getType(); // get the "pointed to" types QualType lhptee = LHSTy->castAs()->getPointeeType(); QualType rhptee = RHSTy->castAs()->getPointeeType(); // ignore qualifiers on void (C99 6.5.15p3, clause 6) if (lhptee->isVoidType() && rhptee->isIncompleteOrObjectType()) { // Figure out necessary qualifiers (C99 6.5.15p6) QualType destPointee = S.Context.getQualifiedType(lhptee, rhptee.getQualifiers()); QualType destType = S.Context.getPointerType(destPointee); // Add qualifiers if necessary. LHS = S.ImpCastExprToType(LHS.get(), destType, CK_NoOp); // Promote to void*. RHS = S.ImpCastExprToType(RHS.get(), destType, CK_BitCast); return destType; } if (rhptee->isVoidType() && lhptee->isIncompleteOrObjectType()) { QualType destPointee = S.Context.getQualifiedType(rhptee, lhptee.getQualifiers()); QualType destType = S.Context.getPointerType(destPointee); // Add qualifiers if necessary. RHS = S.ImpCastExprToType(RHS.get(), destType, CK_NoOp); // Promote to void*. LHS = S.ImpCastExprToType(LHS.get(), destType, CK_BitCast); return destType; } return checkConditionalPointerCompatibility(S, LHS, RHS, Loc); } /// Return false if the first expression is not an integer and the second /// expression is not a pointer, true otherwise. static bool checkPointerIntegerMismatch(Sema &S, ExprResult &Int, Expr* PointerExpr, SourceLocation Loc, bool IsIntFirstExpr) { if (!PointerExpr->getType()->isPointerType() || !Int.get()->getType()->isIntegerType()) return false; Expr *Expr1 = IsIntFirstExpr ? Int.get() : PointerExpr; Expr *Expr2 = IsIntFirstExpr ? PointerExpr : Int.get(); S.Diag(Loc, diag::ext_typecheck_cond_pointer_integer_mismatch) << Expr1->getType() << Expr2->getType() << Expr1->getSourceRange() << Expr2->getSourceRange(); Int = S.ImpCastExprToType(Int.get(), PointerExpr->getType(), CK_IntegralToPointer); return true; } /// Simple conversion between integer and floating point types. /// /// Used when handling the OpenCL conditional operator where the /// condition is a vector while the other operands are scalar. /// /// OpenCL v1.1 s6.3.i and s6.11.6 together require that the scalar /// types are either integer or floating type. Between the two /// operands, the type with the higher rank is defined as the "result /// type". The other operand needs to be promoted to the same type. No /// other type promotion is allowed. We cannot use /// UsualArithmeticConversions() for this purpose, since it always /// promotes promotable types. static QualType OpenCLArithmeticConversions(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation QuestionLoc) { LHS = S.DefaultFunctionArrayLvalueConversion(LHS.get()); if (LHS.isInvalid()) return QualType(); RHS = S.DefaultFunctionArrayLvalueConversion(RHS.get()); if (RHS.isInvalid()) return QualType(); // For conversion purposes, we ignore any qualifiers. // For example, "const float" and "float" are equivalent. QualType LHSType = S.Context.getCanonicalType(LHS.get()->getType()).getUnqualifiedType(); QualType RHSType = S.Context.getCanonicalType(RHS.get()->getType()).getUnqualifiedType(); if (!LHSType->isIntegerType() && !LHSType->isRealFloatingType()) { S.Diag(QuestionLoc, diag::err_typecheck_cond_expect_int_float) << LHSType << LHS.get()->getSourceRange(); return QualType(); } if (!RHSType->isIntegerType() && !RHSType->isRealFloatingType()) { S.Diag(QuestionLoc, diag::err_typecheck_cond_expect_int_float) << RHSType << RHS.get()->getSourceRange(); return QualType(); } // If both types are identical, no conversion is needed. if (LHSType == RHSType) return LHSType; // Now handle "real" floating types (i.e. float, double, long double). if (LHSType->isRealFloatingType() || RHSType->isRealFloatingType()) return handleFloatConversion(S, LHS, RHS, LHSType, RHSType, /*IsCompAssign = */ false); // Finally, we have two differing integer types. return handleIntegerConversion (S, LHS, RHS, LHSType, RHSType, /*IsCompAssign = */ false); } /// Convert scalar operands to a vector that matches the /// condition in length. /// /// Used when handling the OpenCL conditional operator where the /// condition is a vector while the other operands are scalar. /// /// We first compute the "result type" for the scalar operands /// according to OpenCL v1.1 s6.3.i. Both operands are then converted /// into a vector of that type where the length matches the condition /// vector type. s6.11.6 requires that the element types of the result /// and the condition must have the same number of bits. static QualType OpenCLConvertScalarsToVectors(Sema &S, ExprResult &LHS, ExprResult &RHS, QualType CondTy, SourceLocation QuestionLoc) { QualType ResTy = OpenCLArithmeticConversions(S, LHS, RHS, QuestionLoc); if (ResTy.isNull()) return QualType(); const VectorType *CV = CondTy->getAs(); assert(CV); // Determine the vector result type unsigned NumElements = CV->getNumElements(); QualType VectorTy = S.Context.getExtVectorType(ResTy, NumElements); // Ensure that all types have the same number of bits if (S.Context.getTypeSize(CV->getElementType()) != S.Context.getTypeSize(ResTy)) { // Since VectorTy is created internally, it does not pretty print // with an OpenCL name. Instead, we just print a description. std::string EleTyName = ResTy.getUnqualifiedType().getAsString(); SmallString<64> Str; llvm::raw_svector_ostream OS(Str); OS << "(vector of " << NumElements << " '" << EleTyName << "' values)"; S.Diag(QuestionLoc, diag::err_conditional_vector_element_size) << CondTy << OS.str(); return QualType(); } // Convert operands to the vector result type LHS = S.ImpCastExprToType(LHS.get(), VectorTy, CK_VectorSplat); RHS = S.ImpCastExprToType(RHS.get(), VectorTy, CK_VectorSplat); return VectorTy; } /// Return false if this is a valid OpenCL condition vector static bool checkOpenCLConditionVector(Sema &S, Expr *Cond, SourceLocation QuestionLoc) { // OpenCL v1.1 s6.11.6 says the elements of the vector must be of // integral type. const VectorType *CondTy = Cond->getType()->getAs(); assert(CondTy); QualType EleTy = CondTy->getElementType(); if (EleTy->isIntegerType()) return false; S.Diag(QuestionLoc, diag::err_typecheck_cond_expect_nonfloat) << Cond->getType() << Cond->getSourceRange(); return true; } /// Return false if the vector condition type and the vector /// result type are compatible. /// /// OpenCL v1.1 s6.11.6 requires that both vector types have the same /// number of elements, and their element types have the same number /// of bits. static bool checkVectorResult(Sema &S, QualType CondTy, QualType VecResTy, SourceLocation QuestionLoc) { const VectorType *CV = CondTy->getAs(); const VectorType *RV = VecResTy->getAs(); assert(CV && RV); if (CV->getNumElements() != RV->getNumElements()) { S.Diag(QuestionLoc, diag::err_conditional_vector_size) << CondTy << VecResTy; return true; } QualType CVE = CV->getElementType(); QualType RVE = RV->getElementType(); if (S.Context.getTypeSize(CVE) != S.Context.getTypeSize(RVE)) { S.Diag(QuestionLoc, diag::err_conditional_vector_element_size) << CondTy << VecResTy; return true; } return false; } /// Return the resulting type for the conditional operator in /// OpenCL (aka "ternary selection operator", OpenCL v1.1 /// s6.3.i) when the condition is a vector type. static QualType OpenCLCheckVectorConditional(Sema &S, ExprResult &Cond, ExprResult &LHS, ExprResult &RHS, SourceLocation QuestionLoc) { Cond = S.DefaultFunctionArrayLvalueConversion(Cond.get()); if (Cond.isInvalid()) return QualType(); QualType CondTy = Cond.get()->getType(); if (checkOpenCLConditionVector(S, Cond.get(), QuestionLoc)) return QualType(); // If either operand is a vector then find the vector type of the // result as specified in OpenCL v1.1 s6.3.i. if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) { bool IsBoolVecLang = !S.getLangOpts().OpenCL && !S.getLangOpts().OpenCLCPlusPlus; QualType VecResTy = S.CheckVectorOperands(LHS, RHS, QuestionLoc, /*isCompAssign*/ false, /*AllowBothBool*/ true, /*AllowBoolConversions*/ false, /*AllowBooleanOperation*/ IsBoolVecLang, /*ReportInvalid*/ true); if (VecResTy.isNull()) return QualType(); // The result type must match the condition type as specified in // OpenCL v1.1 s6.11.6. if (checkVectorResult(S, CondTy, VecResTy, QuestionLoc)) return QualType(); return VecResTy; } // Both operands are scalar. return OpenCLConvertScalarsToVectors(S, LHS, RHS, CondTy, QuestionLoc); } /// Return true if the Expr is block type static bool checkBlockType(Sema &S, const Expr *E) { if (const CallExpr *CE = dyn_cast(E)) { QualType Ty = CE->getCallee()->getType(); if (Ty->isBlockPointerType()) { S.Diag(E->getExprLoc(), diag::err_opencl_ternary_with_block); return true; } } return false; } /// Note that LHS is not null here, even if this is the gnu "x ?: y" extension. /// In that case, LHS = cond. /// C99 6.5.15 QualType Sema::CheckConditionalOperands(ExprResult &Cond, ExprResult &LHS, ExprResult &RHS, ExprValueKind &VK, ExprObjectKind &OK, SourceLocation QuestionLoc) { ExprResult LHSResult = CheckPlaceholderExpr(LHS.get()); if (!LHSResult.isUsable()) return QualType(); LHS = LHSResult; ExprResult RHSResult = CheckPlaceholderExpr(RHS.get()); if (!RHSResult.isUsable()) return QualType(); RHS = RHSResult; // C++ is sufficiently different to merit its own checker. if (getLangOpts().CPlusPlus) return CXXCheckConditionalOperands(Cond, LHS, RHS, VK, OK, QuestionLoc); VK = VK_PRValue; OK = OK_Ordinary; if (Context.isDependenceAllowed() && (Cond.get()->isTypeDependent() || LHS.get()->isTypeDependent() || RHS.get()->isTypeDependent())) { assert(!getLangOpts().CPlusPlus); assert((Cond.get()->containsErrors() || LHS.get()->containsErrors() || RHS.get()->containsErrors()) && "should only occur in error-recovery path."); return Context.DependentTy; } // The OpenCL operator with a vector condition is sufficiently // different to merit its own checker. if ((getLangOpts().OpenCL && Cond.get()->getType()->isVectorType()) || Cond.get()->getType()->isExtVectorType()) return OpenCLCheckVectorConditional(*this, Cond, LHS, RHS, QuestionLoc); // First, check the condition. Cond = UsualUnaryConversions(Cond.get()); if (Cond.isInvalid()) return QualType(); if (checkCondition(*this, Cond.get(), QuestionLoc)) return QualType(); // Now check the two expressions. if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) return CheckVectorOperands(LHS, RHS, QuestionLoc, /*isCompAssign*/ false, /*AllowBothBool*/ true, /*AllowBoolConversions*/ false, /*AllowBooleanOperation*/ false, /*ReportInvalid*/ true); QualType ResTy = UsualArithmeticConversions(LHS, RHS, QuestionLoc, ACK_Conditional); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); QualType LHSTy = LHS.get()->getType(); QualType RHSTy = RHS.get()->getType(); // Diagnose attempts to convert between __ibm128, __float128 and long double // where such conversions currently can't be handled. if (unsupportedTypeConversion(*this, LHSTy, RHSTy)) { Diag(QuestionLoc, diag::err_typecheck_cond_incompatible_operands) << LHSTy << RHSTy << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } // OpenCL v2.0 s6.12.5 - Blocks cannot be used as expressions of the ternary // selection operator (?:). if (getLangOpts().OpenCL && ((int)checkBlockType(*this, LHS.get()) | (int)checkBlockType(*this, RHS.get()))) { return QualType(); } // If both operands have arithmetic type, do the usual arithmetic conversions // to find a common type: C99 6.5.15p3,5. if (LHSTy->isArithmeticType() && RHSTy->isArithmeticType()) { // Disallow invalid arithmetic conversions, such as those between bit- // precise integers types of different sizes, or between a bit-precise // integer and another type. if (ResTy.isNull() && (LHSTy->isBitIntType() || RHSTy->isBitIntType())) { Diag(QuestionLoc, diag::err_typecheck_cond_incompatible_operands) << LHSTy << RHSTy << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } LHS = ImpCastExprToType(LHS.get(), ResTy, PrepareScalarCast(LHS, ResTy)); RHS = ImpCastExprToType(RHS.get(), ResTy, PrepareScalarCast(RHS, ResTy)); return ResTy; } // And if they're both bfloat (which isn't arithmetic), that's fine too. if (LHSTy->isBFloat16Type() && RHSTy->isBFloat16Type()) { return LHSTy; } // If both operands are the same structure or union type, the result is that // type. if (const RecordType *LHSRT = LHSTy->getAs()) { // C99 6.5.15p3 if (const RecordType *RHSRT = RHSTy->getAs()) if (LHSRT->getDecl() == RHSRT->getDecl()) // "If both the operands have structure or union type, the result has // that type." This implies that CV qualifiers are dropped. return LHSTy.getUnqualifiedType(); // FIXME: Type of conditional expression must be complete in C mode. } // C99 6.5.15p5: "If both operands have void type, the result has void type." // The following || allows only one side to be void (a GCC-ism). if (LHSTy->isVoidType() || RHSTy->isVoidType()) { return checkConditionalVoidType(*this, LHS, RHS); } // C99 6.5.15p6 - "if one operand is a null pointer constant, the result has // the type of the other operand." if (!checkConditionalNullPointer(*this, RHS, LHSTy)) return LHSTy; if (!checkConditionalNullPointer(*this, LHS, RHSTy)) return RHSTy; // All objective-c pointer type analysis is done here. QualType compositeType = FindCompositeObjCPointerType(LHS, RHS, QuestionLoc); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); if (!compositeType.isNull()) return compositeType; // Handle block pointer types. if (LHSTy->isBlockPointerType() || RHSTy->isBlockPointerType()) return checkConditionalBlockPointerCompatibility(*this, LHS, RHS, QuestionLoc); // Check constraints for C object pointers types (C99 6.5.15p3,6). if (LHSTy->isPointerType() && RHSTy->isPointerType()) return checkConditionalObjectPointersCompatibility(*this, LHS, RHS, QuestionLoc); // GCC compatibility: soften pointer/integer mismatch. Note that // null pointers have been filtered out by this point. if (checkPointerIntegerMismatch(*this, LHS, RHS.get(), QuestionLoc, /*IsIntFirstExpr=*/true)) return RHSTy; if (checkPointerIntegerMismatch(*this, RHS, LHS.get(), QuestionLoc, /*IsIntFirstExpr=*/false)) return LHSTy; // Allow ?: operations in which both operands have the same // built-in sizeless type. if (LHSTy->isSizelessBuiltinType() && Context.hasSameType(LHSTy, RHSTy)) return LHSTy; // Emit a better diagnostic if one of the expressions is a null pointer // constant and the other is not a pointer type. In this case, the user most // likely forgot to take the address of the other expression. if (DiagnoseConditionalForNull(LHS.get(), RHS.get(), QuestionLoc)) return QualType(); // Otherwise, the operands are not compatible. Diag(QuestionLoc, diag::err_typecheck_cond_incompatible_operands) << LHSTy << RHSTy << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } /// FindCompositeObjCPointerType - Helper method to find composite type of /// two objective-c pointer types of the two input expressions. QualType Sema::FindCompositeObjCPointerType(ExprResult &LHS, ExprResult &RHS, SourceLocation QuestionLoc) { QualType LHSTy = LHS.get()->getType(); QualType RHSTy = RHS.get()->getType(); // Handle things like Class and struct objc_class*. Here we case the result // to the pseudo-builtin, because that will be implicitly cast back to the // redefinition type if an attempt is made to access its fields. if (LHSTy->isObjCClassType() && (Context.hasSameType(RHSTy, Context.getObjCClassRedefinitionType()))) { RHS = ImpCastExprToType(RHS.get(), LHSTy, CK_CPointerToObjCPointerCast); return LHSTy; } if (RHSTy->isObjCClassType() && (Context.hasSameType(LHSTy, Context.getObjCClassRedefinitionType()))) { LHS = ImpCastExprToType(LHS.get(), RHSTy, CK_CPointerToObjCPointerCast); return RHSTy; } // And the same for struct objc_object* / id if (LHSTy->isObjCIdType() && (Context.hasSameType(RHSTy, Context.getObjCIdRedefinitionType()))) { RHS = ImpCastExprToType(RHS.get(), LHSTy, CK_CPointerToObjCPointerCast); return LHSTy; } if (RHSTy->isObjCIdType() && (Context.hasSameType(LHSTy, Context.getObjCIdRedefinitionType()))) { LHS = ImpCastExprToType(LHS.get(), RHSTy, CK_CPointerToObjCPointerCast); return RHSTy; } // And the same for struct objc_selector* / SEL if (Context.isObjCSelType(LHSTy) && (Context.hasSameType(RHSTy, Context.getObjCSelRedefinitionType()))) { RHS = ImpCastExprToType(RHS.get(), LHSTy, CK_BitCast); return LHSTy; } if (Context.isObjCSelType(RHSTy) && (Context.hasSameType(LHSTy, Context.getObjCSelRedefinitionType()))) { LHS = ImpCastExprToType(LHS.get(), RHSTy, CK_BitCast); return RHSTy; } // Check constraints for Objective-C object pointers types. if (LHSTy->isObjCObjectPointerType() && RHSTy->isObjCObjectPointerType()) { if (Context.getCanonicalType(LHSTy) == Context.getCanonicalType(RHSTy)) { // Two identical object pointer types are always compatible. return LHSTy; } const ObjCObjectPointerType *LHSOPT = LHSTy->castAs(); const ObjCObjectPointerType *RHSOPT = RHSTy->castAs(); QualType compositeType = LHSTy; // If both operands are interfaces and either operand can be // assigned to the other, use that type as the composite // type. This allows // xxx ? (A*) a : (B*) b // where B is a subclass of A. // // Additionally, as for assignment, if either type is 'id' // allow silent coercion. Finally, if the types are // incompatible then make sure to use 'id' as the composite // type so the result is acceptable for sending messages to. // FIXME: Consider unifying with 'areComparableObjCPointerTypes'. // It could return the composite type. if (!(compositeType = Context.areCommonBaseCompatible(LHSOPT, RHSOPT)).isNull()) { // Nothing more to do. } else if (Context.canAssignObjCInterfaces(LHSOPT, RHSOPT)) { compositeType = RHSOPT->isObjCBuiltinType() ? RHSTy : LHSTy; } else if (Context.canAssignObjCInterfaces(RHSOPT, LHSOPT)) { compositeType = LHSOPT->isObjCBuiltinType() ? LHSTy : RHSTy; } else if ((LHSOPT->isObjCQualifiedIdType() || RHSOPT->isObjCQualifiedIdType()) && Context.ObjCQualifiedIdTypesAreCompatible(LHSOPT, RHSOPT, true)) { // Need to handle "id" explicitly. // GCC allows qualified id and any Objective-C type to devolve to // id. Currently localizing to here until clear this should be // part of ObjCQualifiedIdTypesAreCompatible. compositeType = Context.getObjCIdType(); } else if (LHSTy->isObjCIdType() || RHSTy->isObjCIdType()) { compositeType = Context.getObjCIdType(); } else { Diag(QuestionLoc, diag::ext_typecheck_cond_incompatible_operands) << LHSTy << RHSTy << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); QualType incompatTy = Context.getObjCIdType(); LHS = ImpCastExprToType(LHS.get(), incompatTy, CK_BitCast); RHS = ImpCastExprToType(RHS.get(), incompatTy, CK_BitCast); return incompatTy; } // The object pointer types are compatible. LHS = ImpCastExprToType(LHS.get(), compositeType, CK_BitCast); RHS = ImpCastExprToType(RHS.get(), compositeType, CK_BitCast); return compositeType; } // Check Objective-C object pointer types and 'void *' if (LHSTy->isVoidPointerType() && RHSTy->isObjCObjectPointerType()) { if (getLangOpts().ObjCAutoRefCount) { // ARC forbids the implicit conversion of object pointers to 'void *', // so these types are not compatible. Diag(QuestionLoc, diag::err_cond_voidptr_arc) << LHSTy << RHSTy << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); LHS = RHS = true; return QualType(); } QualType lhptee = LHSTy->castAs()->getPointeeType(); QualType rhptee = RHSTy->castAs()->getPointeeType(); QualType destPointee = Context.getQualifiedType(lhptee, rhptee.getQualifiers()); QualType destType = Context.getPointerType(destPointee); // Add qualifiers if necessary. LHS = ImpCastExprToType(LHS.get(), destType, CK_NoOp); // Promote to void*. RHS = ImpCastExprToType(RHS.get(), destType, CK_BitCast); return destType; } if (LHSTy->isObjCObjectPointerType() && RHSTy->isVoidPointerType()) { if (getLangOpts().ObjCAutoRefCount) { // ARC forbids the implicit conversion of object pointers to 'void *', // so these types are not compatible. Diag(QuestionLoc, diag::err_cond_voidptr_arc) << LHSTy << RHSTy << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); LHS = RHS = true; return QualType(); } QualType lhptee = LHSTy->castAs()->getPointeeType(); QualType rhptee = RHSTy->castAs()->getPointeeType(); QualType destPointee = Context.getQualifiedType(rhptee, lhptee.getQualifiers()); QualType destType = Context.getPointerType(destPointee); // Add qualifiers if necessary. RHS = ImpCastExprToType(RHS.get(), destType, CK_NoOp); // Promote to void*. LHS = ImpCastExprToType(LHS.get(), destType, CK_BitCast); return destType; } return QualType(); } /// SuggestParentheses - Emit a note with a fixit hint that wraps /// ParenRange in parentheses. static void SuggestParentheses(Sema &Self, SourceLocation Loc, const PartialDiagnostic &Note, SourceRange ParenRange) { SourceLocation EndLoc = Self.getLocForEndOfToken(ParenRange.getEnd()); if (ParenRange.getBegin().isFileID() && ParenRange.getEnd().isFileID() && EndLoc.isValid()) { Self.Diag(Loc, Note) << FixItHint::CreateInsertion(ParenRange.getBegin(), "(") << FixItHint::CreateInsertion(EndLoc, ")"); } else { // We can't display the parentheses, so just show the bare note. Self.Diag(Loc, Note) << ParenRange; } } static bool IsArithmeticOp(BinaryOperatorKind Opc) { return BinaryOperator::isAdditiveOp(Opc) || BinaryOperator::isMultiplicativeOp(Opc) || BinaryOperator::isShiftOp(Opc) || Opc == BO_And || Opc == BO_Or; // This only checks for bitwise-or and bitwise-and, but not bitwise-xor and // not any of the logical operators. Bitwise-xor is commonly used as a // logical-xor because there is no logical-xor operator. The logical // operators, including uses of xor, have a high false positive rate for // precedence warnings. } /// IsArithmeticBinaryExpr - Returns true if E is an arithmetic binary /// expression, either using a built-in or overloaded operator, /// and sets *OpCode to the opcode and *RHSExprs to the right-hand side /// expression. static bool IsArithmeticBinaryExpr(Expr *E, BinaryOperatorKind *Opcode, Expr **RHSExprs) { // Don't strip parenthesis: we should not warn if E is in parenthesis. E = E->IgnoreImpCasts(); E = E->IgnoreConversionOperatorSingleStep(); E = E->IgnoreImpCasts(); if (auto *MTE = dyn_cast(E)) { E = MTE->getSubExpr(); E = E->IgnoreImpCasts(); } // Built-in binary operator. if (BinaryOperator *OP = dyn_cast(E)) { if (IsArithmeticOp(OP->getOpcode())) { *Opcode = OP->getOpcode(); *RHSExprs = OP->getRHS(); return true; } } // Overloaded operator. if (CXXOperatorCallExpr *Call = dyn_cast(E)) { if (Call->getNumArgs() != 2) return false; // Make sure this is really a binary operator that is safe to pass into // BinaryOperator::getOverloadedOpcode(), e.g. it's not a subscript op. OverloadedOperatorKind OO = Call->getOperator(); if (OO < OO_Plus || OO > OO_Arrow || OO == OO_PlusPlus || OO == OO_MinusMinus) return false; BinaryOperatorKind OpKind = BinaryOperator::getOverloadedOpcode(OO); if (IsArithmeticOp(OpKind)) { *Opcode = OpKind; *RHSExprs = Call->getArg(1); return true; } } return false; } /// ExprLooksBoolean - Returns true if E looks boolean, i.e. it has boolean type /// or is a logical expression such as (x==y) which has int type, but is /// commonly interpreted as boolean. static bool ExprLooksBoolean(Expr *E) { E = E->IgnoreParenImpCasts(); if (E->getType()->isBooleanType()) return true; if (BinaryOperator *OP = dyn_cast(E)) return OP->isComparisonOp() || OP->isLogicalOp(); if (UnaryOperator *OP = dyn_cast(E)) return OP->getOpcode() == UO_LNot; if (E->getType()->isPointerType()) return true; // FIXME: What about overloaded operator calls returning "unspecified boolean // type"s (commonly pointer-to-members)? return false; } /// DiagnoseConditionalPrecedence - Emit a warning when a conditional operator /// and binary operator are mixed in a way that suggests the programmer assumed /// the conditional operator has higher precedence, for example: /// "int x = a + someBinaryCondition ? 1 : 2". static void DiagnoseConditionalPrecedence(Sema &Self, SourceLocation OpLoc, Expr *Condition, Expr *LHSExpr, Expr *RHSExpr) { BinaryOperatorKind CondOpcode; Expr *CondRHS; if (!IsArithmeticBinaryExpr(Condition, &CondOpcode, &CondRHS)) return; if (!ExprLooksBoolean(CondRHS)) return; // The condition is an arithmetic binary expression, with a right- // hand side that looks boolean, so warn. unsigned DiagID = BinaryOperator::isBitwiseOp(CondOpcode) ? diag::warn_precedence_bitwise_conditional : diag::warn_precedence_conditional; Self.Diag(OpLoc, DiagID) << Condition->getSourceRange() << BinaryOperator::getOpcodeStr(CondOpcode); SuggestParentheses( Self, OpLoc, Self.PDiag(diag::note_precedence_silence) << BinaryOperator::getOpcodeStr(CondOpcode), SourceRange(Condition->getBeginLoc(), Condition->getEndLoc())); SuggestParentheses(Self, OpLoc, Self.PDiag(diag::note_precedence_conditional_first), SourceRange(CondRHS->getBeginLoc(), RHSExpr->getEndLoc())); } /// Compute the nullability of a conditional expression. static QualType computeConditionalNullability(QualType ResTy, bool IsBin, QualType LHSTy, QualType RHSTy, ASTContext &Ctx) { if (!ResTy->isAnyPointerType()) return ResTy; auto GetNullability = [&Ctx](QualType Ty) { Optional Kind = Ty->getNullability(Ctx); if (Kind) { // For our purposes, treat _Nullable_result as _Nullable. if (*Kind == NullabilityKind::NullableResult) return NullabilityKind::Nullable; return *Kind; } return NullabilityKind::Unspecified; }; auto LHSKind = GetNullability(LHSTy), RHSKind = GetNullability(RHSTy); NullabilityKind MergedKind; // Compute nullability of a binary conditional expression. if (IsBin) { if (LHSKind == NullabilityKind::NonNull) MergedKind = NullabilityKind::NonNull; else MergedKind = RHSKind; // Compute nullability of a normal conditional expression. } else { if (LHSKind == NullabilityKind::Nullable || RHSKind == NullabilityKind::Nullable) MergedKind = NullabilityKind::Nullable; else if (LHSKind == NullabilityKind::NonNull) MergedKind = RHSKind; else if (RHSKind == NullabilityKind::NonNull) MergedKind = LHSKind; else MergedKind = NullabilityKind::Unspecified; } // Return if ResTy already has the correct nullability. if (GetNullability(ResTy) == MergedKind) return ResTy; // Strip all nullability from ResTy. while (ResTy->getNullability(Ctx)) ResTy = ResTy.getSingleStepDesugaredType(Ctx); // Create a new AttributedType with the new nullability kind. auto NewAttr = AttributedType::getNullabilityAttrKind(MergedKind); return Ctx.getAttributedType(NewAttr, ResTy, ResTy); } /// ActOnConditionalOp - Parse a ?: operation. Note that 'LHS' may be null /// in the case of a the GNU conditional expr extension. ExprResult Sema::ActOnConditionalOp(SourceLocation QuestionLoc, SourceLocation ColonLoc, Expr *CondExpr, Expr *LHSExpr, Expr *RHSExpr) { if (!Context.isDependenceAllowed()) { // C cannot handle TypoExpr nodes in the condition because it // doesn't handle dependent types properly, so make sure any TypoExprs have // been dealt with before checking the operands. ExprResult CondResult = CorrectDelayedTyposInExpr(CondExpr); ExprResult LHSResult = CorrectDelayedTyposInExpr(LHSExpr); ExprResult RHSResult = CorrectDelayedTyposInExpr(RHSExpr); if (!CondResult.isUsable()) return ExprError(); if (LHSExpr) { if (!LHSResult.isUsable()) return ExprError(); } if (!RHSResult.isUsable()) return ExprError(); CondExpr = CondResult.get(); LHSExpr = LHSResult.get(); RHSExpr = RHSResult.get(); } // If this is the gnu "x ?: y" extension, analyze the types as though the LHS // was the condition. OpaqueValueExpr *opaqueValue = nullptr; Expr *commonExpr = nullptr; if (!LHSExpr) { commonExpr = CondExpr; // Lower out placeholder types first. This is important so that we don't // try to capture a placeholder. This happens in few cases in C++; such // as Objective-C++'s dictionary subscripting syntax. if (commonExpr->hasPlaceholderType()) { ExprResult result = CheckPlaceholderExpr(commonExpr); if (!result.isUsable()) return ExprError(); commonExpr = result.get(); } // We usually want to apply unary conversions *before* saving, except // in the special case of a C++ l-value conditional. if (!(getLangOpts().CPlusPlus && !commonExpr->isTypeDependent() && commonExpr->getValueKind() == RHSExpr->getValueKind() && commonExpr->isGLValue() && commonExpr->isOrdinaryOrBitFieldObject() && RHSExpr->isOrdinaryOrBitFieldObject() && Context.hasSameType(commonExpr->getType(), RHSExpr->getType()))) { ExprResult commonRes = UsualUnaryConversions(commonExpr); if (commonRes.isInvalid()) return ExprError(); commonExpr = commonRes.get(); } // If the common expression is a class or array prvalue, materialize it // so that we can safely refer to it multiple times. if (commonExpr->isPRValue() && (commonExpr->getType()->isRecordType() || commonExpr->getType()->isArrayType())) { ExprResult MatExpr = TemporaryMaterializationConversion(commonExpr); if (MatExpr.isInvalid()) return ExprError(); commonExpr = MatExpr.get(); } opaqueValue = new (Context) OpaqueValueExpr(commonExpr->getExprLoc(), commonExpr->getType(), commonExpr->getValueKind(), commonExpr->getObjectKind(), commonExpr); LHSExpr = CondExpr = opaqueValue; } QualType LHSTy = LHSExpr->getType(), RHSTy = RHSExpr->getType(); ExprValueKind VK = VK_PRValue; ExprObjectKind OK = OK_Ordinary; ExprResult Cond = CondExpr, LHS = LHSExpr, RHS = RHSExpr; QualType result = CheckConditionalOperands(Cond, LHS, RHS, VK, OK, QuestionLoc); if (result.isNull() || Cond.isInvalid() || LHS.isInvalid() || RHS.isInvalid()) return ExprError(); DiagnoseConditionalPrecedence(*this, QuestionLoc, Cond.get(), LHS.get(), RHS.get()); CheckBoolLikeConversion(Cond.get(), QuestionLoc); result = computeConditionalNullability(result, commonExpr, LHSTy, RHSTy, Context); if (!commonExpr) return new (Context) ConditionalOperator(Cond.get(), QuestionLoc, LHS.get(), ColonLoc, RHS.get(), result, VK, OK); return new (Context) BinaryConditionalOperator( commonExpr, opaqueValue, Cond.get(), LHS.get(), RHS.get(), QuestionLoc, ColonLoc, result, VK, OK); } // Check if we have a conversion between incompatible cmse function pointer // types, that is, a conversion between a function pointer with the // cmse_nonsecure_call attribute and one without. static bool IsInvalidCmseNSCallConversion(Sema &S, QualType FromType, QualType ToType) { if (const auto *ToFn = dyn_cast(S.Context.getCanonicalType(ToType))) { if (const auto *FromFn = dyn_cast(S.Context.getCanonicalType(FromType))) { FunctionType::ExtInfo ToEInfo = ToFn->getExtInfo(); FunctionType::ExtInfo FromEInfo = FromFn->getExtInfo(); return ToEInfo.getCmseNSCall() != FromEInfo.getCmseNSCall(); } } return false; } // checkPointerTypesForAssignment - This is a very tricky routine (despite // being closely modeled after the C99 spec:-). The odd characteristic of this // routine is it effectively iqnores the qualifiers on the top level pointee. // This circumvents the usual type rules specified in 6.2.7p1 & 6.7.5.[1-3]. // FIXME: add a couple examples in this comment. static Sema::AssignConvertType checkPointerTypesForAssignment(Sema &S, QualType LHSType, QualType RHSType) { assert(LHSType.isCanonical() && "LHS not canonicalized!"); assert(RHSType.isCanonical() && "RHS not canonicalized!"); // get the "pointed to" type (ignoring qualifiers at the top level) const Type *lhptee, *rhptee; Qualifiers lhq, rhq; std::tie(lhptee, lhq) = cast(LHSType)->getPointeeType().split().asPair(); std::tie(rhptee, rhq) = cast(RHSType)->getPointeeType().split().asPair(); Sema::AssignConvertType ConvTy = Sema::Compatible; // C99 6.5.16.1p1: This following citation is common to constraints // 3 & 4 (below). ...and the type *pointed to* by the left has all the // qualifiers of the type *pointed to* by the right; // As a special case, 'non-__weak A *' -> 'non-__weak const *' is okay. if (lhq.getObjCLifetime() != rhq.getObjCLifetime() && lhq.compatiblyIncludesObjCLifetime(rhq)) { // Ignore lifetime for further calculation. lhq.removeObjCLifetime(); rhq.removeObjCLifetime(); } if (!lhq.compatiblyIncludes(rhq)) { // Treat address-space mismatches as fatal. if (!lhq.isAddressSpaceSupersetOf(rhq)) return Sema::IncompatiblePointerDiscardsQualifiers; // It's okay to add or remove GC or lifetime qualifiers when converting to // and from void*. else if (lhq.withoutObjCGCAttr().withoutObjCLifetime() .compatiblyIncludes( rhq.withoutObjCGCAttr().withoutObjCLifetime()) && (lhptee->isVoidType() || rhptee->isVoidType())) ; // keep old // Treat lifetime mismatches as fatal. else if (lhq.getObjCLifetime() != rhq.getObjCLifetime()) ConvTy = Sema::IncompatiblePointerDiscardsQualifiers; // For GCC/MS compatibility, other qualifier mismatches are treated // as still compatible in C. else ConvTy = Sema::CompatiblePointerDiscardsQualifiers; } // C99 6.5.16.1p1 (constraint 4): If one operand is a pointer to an object or // incomplete type and the other is a pointer to a qualified or unqualified // version of void... if (lhptee->isVoidType()) { if (rhptee->isIncompleteOrObjectType()) return ConvTy; // As an extension, we allow cast to/from void* to function pointer. assert(rhptee->isFunctionType()); return Sema::FunctionVoidPointer; } if (rhptee->isVoidType()) { if (lhptee->isIncompleteOrObjectType()) return ConvTy; // As an extension, we allow cast to/from void* to function pointer. assert(lhptee->isFunctionType()); return Sema::FunctionVoidPointer; } // C99 6.5.16.1p1 (constraint 3): both operands are pointers to qualified or // unqualified versions of compatible types, ... QualType ltrans = QualType(lhptee, 0), rtrans = QualType(rhptee, 0); if (!S.Context.typesAreCompatible(ltrans, rtrans)) { // Check if the pointee types are compatible ignoring the sign. // We explicitly check for char so that we catch "char" vs // "unsigned char" on systems where "char" is unsigned. if (lhptee->isCharType()) ltrans = S.Context.UnsignedCharTy; else if (lhptee->hasSignedIntegerRepresentation()) ltrans = S.Context.getCorrespondingUnsignedType(ltrans); if (rhptee->isCharType()) rtrans = S.Context.UnsignedCharTy; else if (rhptee->hasSignedIntegerRepresentation()) rtrans = S.Context.getCorrespondingUnsignedType(rtrans); if (ltrans == rtrans) { // Types are compatible ignoring the sign. Qualifier incompatibility // takes priority over sign incompatibility because the sign // warning can be disabled. if (ConvTy != Sema::Compatible) return ConvTy; return Sema::IncompatiblePointerSign; } // If we are a multi-level pointer, it's possible that our issue is simply // one of qualification - e.g. char ** -> const char ** is not allowed. If // the eventual target type is the same and the pointers have the same // level of indirection, this must be the issue. if (isa(lhptee) && isa(rhptee)) { do { std::tie(lhptee, lhq) = cast(lhptee)->getPointeeType().split().asPair(); std::tie(rhptee, rhq) = cast(rhptee)->getPointeeType().split().asPair(); // Inconsistent address spaces at this point is invalid, even if the // address spaces would be compatible. // FIXME: This doesn't catch address space mismatches for pointers of // different nesting levels, like: // __local int *** a; // int ** b = a; // It's not clear how to actually determine when such pointers are // invalidly incompatible. if (lhq.getAddressSpace() != rhq.getAddressSpace()) return Sema::IncompatibleNestedPointerAddressSpaceMismatch; } while (isa(lhptee) && isa(rhptee)); if (lhptee == rhptee) return Sema::IncompatibleNestedPointerQualifiers; } // General pointer incompatibility takes priority over qualifiers. if (RHSType->isFunctionPointerType() && LHSType->isFunctionPointerType()) return Sema::IncompatibleFunctionPointer; return Sema::IncompatiblePointer; } if (!S.getLangOpts().CPlusPlus && S.IsFunctionConversion(ltrans, rtrans, ltrans)) return Sema::IncompatibleFunctionPointer; if (IsInvalidCmseNSCallConversion(S, ltrans, rtrans)) return Sema::IncompatibleFunctionPointer; return ConvTy; } /// checkBlockPointerTypesForAssignment - This routine determines whether two /// block pointer types are compatible or whether a block and normal pointer /// are compatible. It is more restrict than comparing two function pointer // types. static Sema::AssignConvertType checkBlockPointerTypesForAssignment(Sema &S, QualType LHSType, QualType RHSType) { assert(LHSType.isCanonical() && "LHS not canonicalized!"); assert(RHSType.isCanonical() && "RHS not canonicalized!"); QualType lhptee, rhptee; // get the "pointed to" type (ignoring qualifiers at the top level) lhptee = cast(LHSType)->getPointeeType(); rhptee = cast(RHSType)->getPointeeType(); // In C++, the types have to match exactly. if (S.getLangOpts().CPlusPlus) return Sema::IncompatibleBlockPointer; Sema::AssignConvertType ConvTy = Sema::Compatible; // For blocks we enforce that qualifiers are identical. Qualifiers LQuals = lhptee.getLocalQualifiers(); Qualifiers RQuals = rhptee.getLocalQualifiers(); if (S.getLangOpts().OpenCL) { LQuals.removeAddressSpace(); RQuals.removeAddressSpace(); } if (LQuals != RQuals) ConvTy = Sema::CompatiblePointerDiscardsQualifiers; // FIXME: OpenCL doesn't define the exact compile time semantics for a block // assignment. // The current behavior is similar to C++ lambdas. A block might be // assigned to a variable iff its return type and parameters are compatible // (C99 6.2.7) with the corresponding return type and parameters of the LHS of // an assignment. Presumably it should behave in way that a function pointer // assignment does in C, so for each parameter and return type: // * CVR and address space of LHS should be a superset of CVR and address // space of RHS. // * unqualified types should be compatible. if (S.getLangOpts().OpenCL) { if (!S.Context.typesAreBlockPointerCompatible( S.Context.getQualifiedType(LHSType.getUnqualifiedType(), LQuals), S.Context.getQualifiedType(RHSType.getUnqualifiedType(), RQuals))) return Sema::IncompatibleBlockPointer; } else if (!S.Context.typesAreBlockPointerCompatible(LHSType, RHSType)) return Sema::IncompatibleBlockPointer; return ConvTy; } /// checkObjCPointerTypesForAssignment - Compares two objective-c pointer types /// for assignment compatibility. static Sema::AssignConvertType checkObjCPointerTypesForAssignment(Sema &S, QualType LHSType, QualType RHSType) { assert(LHSType.isCanonical() && "LHS was not canonicalized!"); assert(RHSType.isCanonical() && "RHS was not canonicalized!"); if (LHSType->isObjCBuiltinType()) { // Class is not compatible with ObjC object pointers. if (LHSType->isObjCClassType() && !RHSType->isObjCBuiltinType() && !RHSType->isObjCQualifiedClassType()) return Sema::IncompatiblePointer; return Sema::Compatible; } if (RHSType->isObjCBuiltinType()) { if (RHSType->isObjCClassType() && !LHSType->isObjCBuiltinType() && !LHSType->isObjCQualifiedClassType()) return Sema::IncompatiblePointer; return Sema::Compatible; } QualType lhptee = LHSType->castAs()->getPointeeType(); QualType rhptee = RHSType->castAs()->getPointeeType(); if (!lhptee.isAtLeastAsQualifiedAs(rhptee) && // make an exception for id

!LHSType->isObjCQualifiedIdType()) return Sema::CompatiblePointerDiscardsQualifiers; if (S.Context.typesAreCompatible(LHSType, RHSType)) return Sema::Compatible; if (LHSType->isObjCQualifiedIdType() || RHSType->isObjCQualifiedIdType()) return Sema::IncompatibleObjCQualifiedId; return Sema::IncompatiblePointer; } Sema::AssignConvertType Sema::CheckAssignmentConstraints(SourceLocation Loc, QualType LHSType, QualType RHSType) { // Fake up an opaque expression. We don't actually care about what // cast operations are required, so if CheckAssignmentConstraints // adds casts to this they'll be wasted, but fortunately that doesn't // usually happen on valid code. OpaqueValueExpr RHSExpr(Loc, RHSType, VK_PRValue); ExprResult RHSPtr = &RHSExpr; CastKind K; return CheckAssignmentConstraints(LHSType, RHSPtr, K, /*ConvertRHS=*/false); } /// This helper function returns true if QT is a vector type that has element /// type ElementType. static bool isVector(QualType QT, QualType ElementType) { if (const VectorType *VT = QT->getAs()) return VT->getElementType().getCanonicalType() == ElementType; return false; } /// CheckAssignmentConstraints (C99 6.5.16) - This routine currently /// has code to accommodate several GCC extensions when type checking /// pointers. Here are some objectionable examples that GCC considers warnings: /// /// int a, *pint; /// short *pshort; /// struct foo *pfoo; /// /// pint = pshort; // warning: assignment from incompatible pointer type /// a = pint; // warning: assignment makes integer from pointer without a cast /// pint = a; // warning: assignment makes pointer from integer without a cast /// pint = pfoo; // warning: assignment from incompatible pointer type /// /// As a result, the code for dealing with pointers is more complex than the /// C99 spec dictates. /// /// Sets 'Kind' for any result kind except Incompatible. Sema::AssignConvertType Sema::CheckAssignmentConstraints(QualType LHSType, ExprResult &RHS, CastKind &Kind, bool ConvertRHS) { QualType RHSType = RHS.get()->getType(); QualType OrigLHSType = LHSType; // Get canonical types. We're not formatting these types, just comparing // them. LHSType = Context.getCanonicalType(LHSType).getUnqualifiedType(); RHSType = Context.getCanonicalType(RHSType).getUnqualifiedType(); // Common case: no conversion required. if (LHSType == RHSType) { Kind = CK_NoOp; return Compatible; } // If the LHS has an __auto_type, there are no additional type constraints // to be worried about. if (const auto *AT = dyn_cast(LHSType)) { if (AT->isGNUAutoType()) { Kind = CK_NoOp; return Compatible; } } // If we have an atomic type, try a non-atomic assignment, then just add an // atomic qualification step. if (const AtomicType *AtomicTy = dyn_cast(LHSType)) { Sema::AssignConvertType result = CheckAssignmentConstraints(AtomicTy->getValueType(), RHS, Kind); if (result != Compatible) return result; if (Kind != CK_NoOp && ConvertRHS) RHS = ImpCastExprToType(RHS.get(), AtomicTy->getValueType(), Kind); Kind = CK_NonAtomicToAtomic; return Compatible; } // If the left-hand side is a reference type, then we are in a // (rare!) case where we've allowed the use of references in C, // e.g., as a parameter type in a built-in function. In this case, // just make sure that the type referenced is compatible with the // right-hand side type. The caller is responsible for adjusting // LHSType so that the resulting expression does not have reference // type. if (const ReferenceType *LHSTypeRef = LHSType->getAs()) { if (Context.typesAreCompatible(LHSTypeRef->getPointeeType(), RHSType)) { Kind = CK_LValueBitCast; return Compatible; } return Incompatible; } // Allow scalar to ExtVector assignments, and assignments of an ExtVector type // to the same ExtVector type. if (LHSType->isExtVectorType()) { if (RHSType->isExtVectorType()) return Incompatible; if (RHSType->isArithmeticType()) { // CK_VectorSplat does T -> vector T, so first cast to the element type. if (ConvertRHS) RHS = prepareVectorSplat(LHSType, RHS.get()); Kind = CK_VectorSplat; return Compatible; } } // Conversions to or from vector type. if (LHSType->isVectorType() || RHSType->isVectorType()) { if (LHSType->isVectorType() && RHSType->isVectorType()) { // Allow assignments of an AltiVec vector type to an equivalent GCC // vector type and vice versa if (Context.areCompatibleVectorTypes(LHSType, RHSType)) { Kind = CK_BitCast; return Compatible; } // If we are allowing lax vector conversions, and LHS and RHS are both // vectors, the total size only needs to be the same. This is a bitcast; // no bits are changed but the result type is different. if (isLaxVectorConversion(RHSType, LHSType)) { // The default for lax vector conversions with Altivec vectors will // change, so if we are converting between vector types where // at least one is an Altivec vector, emit a warning. if (anyAltivecTypes(RHSType, LHSType) && !areSameVectorElemTypes(RHSType, LHSType)) Diag(RHS.get()->getExprLoc(), diag::warn_deprecated_lax_vec_conv_all) << RHSType << LHSType; Kind = CK_BitCast; return IncompatibleVectors; } } // When the RHS comes from another lax conversion (e.g. binops between // scalars and vectors) the result is canonicalized as a vector. When the // LHS is also a vector, the lax is allowed by the condition above. Handle // the case where LHS is a scalar. if (LHSType->isScalarType()) { const VectorType *VecType = RHSType->getAs(); if (VecType && VecType->getNumElements() == 1 && isLaxVectorConversion(RHSType, LHSType)) { if (VecType->getVectorKind() == VectorType::AltiVecVector) Diag(RHS.get()->getExprLoc(), diag::warn_deprecated_lax_vec_conv_all) << RHSType << LHSType; ExprResult *VecExpr = &RHS; *VecExpr = ImpCastExprToType(VecExpr->get(), LHSType, CK_BitCast); Kind = CK_BitCast; return Compatible; } } // Allow assignments between fixed-length and sizeless SVE vectors. if ((LHSType->isSizelessBuiltinType() && RHSType->isVectorType()) || (LHSType->isVectorType() && RHSType->isSizelessBuiltinType())) if (Context.areCompatibleSveTypes(LHSType, RHSType) || Context.areLaxCompatibleSveTypes(LHSType, RHSType)) { Kind = CK_BitCast; return Compatible; } return Incompatible; } // Diagnose attempts to convert between __ibm128, __float128 and long double // where such conversions currently can't be handled. if (unsupportedTypeConversion(*this, LHSType, RHSType)) return Incompatible; // Disallow assigning a _Complex to a real type in C++ mode since it simply // discards the imaginary part. if (getLangOpts().CPlusPlus && RHSType->getAs() && !LHSType->getAs()) return Incompatible; // Arithmetic conversions. if (LHSType->isArithmeticType() && RHSType->isArithmeticType() && !(getLangOpts().CPlusPlus && LHSType->isEnumeralType())) { if (ConvertRHS) Kind = PrepareScalarCast(RHS, LHSType); return Compatible; } // Conversions to normal pointers. if (const PointerType *LHSPointer = dyn_cast(LHSType)) { // U* -> T* if (isa(RHSType)) { LangAS AddrSpaceL = LHSPointer->getPointeeType().getAddressSpace(); LangAS AddrSpaceR = RHSType->getPointeeType().getAddressSpace(); if (AddrSpaceL != AddrSpaceR) Kind = CK_AddressSpaceConversion; else if (Context.hasCvrSimilarType(RHSType, LHSType)) Kind = CK_NoOp; else Kind = CK_BitCast; return checkPointerTypesForAssignment(*this, LHSType, RHSType); } // int -> T* if (RHSType->isIntegerType()) { Kind = CK_IntegralToPointer; // FIXME: null? return IntToPointer; } // C pointers are not compatible with ObjC object pointers, // with two exceptions: if (isa(RHSType)) { // - conversions to void* if (LHSPointer->getPointeeType()->isVoidType()) { Kind = CK_BitCast; return Compatible; } // - conversions from 'Class' to the redefinition type if (RHSType->isObjCClassType() && Context.hasSameType(LHSType, Context.getObjCClassRedefinitionType())) { Kind = CK_BitCast; return Compatible; } Kind = CK_BitCast; return IncompatiblePointer; } // U^ -> void* if (RHSType->getAs()) { if (LHSPointer->getPointeeType()->isVoidType()) { LangAS AddrSpaceL = LHSPointer->getPointeeType().getAddressSpace(); LangAS AddrSpaceR = RHSType->getAs() ->getPointeeType() .getAddressSpace(); Kind = AddrSpaceL != AddrSpaceR ? CK_AddressSpaceConversion : CK_BitCast; return Compatible; } } return Incompatible; } // Conversions to block pointers. if (isa(LHSType)) { // U^ -> T^ if (RHSType->isBlockPointerType()) { LangAS AddrSpaceL = LHSType->getAs() ->getPointeeType() .getAddressSpace(); LangAS AddrSpaceR = RHSType->getAs() ->getPointeeType() .getAddressSpace(); Kind = AddrSpaceL != AddrSpaceR ? CK_AddressSpaceConversion : CK_BitCast; return checkBlockPointerTypesForAssignment(*this, LHSType, RHSType); } // int or null -> T^ if (RHSType->isIntegerType()) { Kind = CK_IntegralToPointer; // FIXME: null return IntToBlockPointer; } // id -> T^ if (getLangOpts().ObjC && RHSType->isObjCIdType()) { Kind = CK_AnyPointerToBlockPointerCast; return Compatible; } // void* -> T^ if (const PointerType *RHSPT = RHSType->getAs()) if (RHSPT->getPointeeType()->isVoidType()) { Kind = CK_AnyPointerToBlockPointerCast; return Compatible; } return Incompatible; } // Conversions to Objective-C pointers. if (isa(LHSType)) { // A* -> B* if (RHSType->isObjCObjectPointerType()) { Kind = CK_BitCast; Sema::AssignConvertType result = checkObjCPointerTypesForAssignment(*this, LHSType, RHSType); if (getLangOpts().allowsNonTrivialObjCLifetimeQualifiers() && result == Compatible && !CheckObjCARCUnavailableWeakConversion(OrigLHSType, RHSType)) result = IncompatibleObjCWeakRef; return result; } // int or null -> A* if (RHSType->isIntegerType()) { Kind = CK_IntegralToPointer; // FIXME: null return IntToPointer; } // In general, C pointers are not compatible with ObjC object pointers, // with two exceptions: if (isa(RHSType)) { Kind = CK_CPointerToObjCPointerCast; // - conversions from 'void*' if (RHSType->isVoidPointerType()) { return Compatible; } // - conversions to 'Class' from its redefinition type if (LHSType->isObjCClassType() && Context.hasSameType(RHSType, Context.getObjCClassRedefinitionType())) { return Compatible; } return IncompatiblePointer; } // Only under strict condition T^ is compatible with an Objective-C pointer. if (RHSType->isBlockPointerType() && LHSType->isBlockCompatibleObjCPointerType(Context)) { if (ConvertRHS) maybeExtendBlockObject(RHS); Kind = CK_BlockPointerToObjCPointerCast; return Compatible; } return Incompatible; } // Conversions from pointers that are not covered by the above. if (isa(RHSType)) { // T* -> _Bool if (LHSType == Context.BoolTy) { Kind = CK_PointerToBoolean; return Compatible; } // T* -> int if (LHSType->isIntegerType()) { Kind = CK_PointerToIntegral; return PointerToInt; } return Incompatible; } // Conversions from Objective-C pointers that are not covered by the above. if (isa(RHSType)) { // T* -> _Bool if (LHSType == Context.BoolTy) { Kind = CK_PointerToBoolean; return Compatible; } // T* -> int if (LHSType->isIntegerType()) { Kind = CK_PointerToIntegral; return PointerToInt; } return Incompatible; } // struct A -> struct B if (isa(LHSType) && isa(RHSType)) { if (Context.typesAreCompatible(LHSType, RHSType)) { Kind = CK_NoOp; return Compatible; } } if (LHSType->isSamplerT() && RHSType->isIntegerType()) { Kind = CK_IntToOCLSampler; return Compatible; } return Incompatible; } /// Constructs a transparent union from an expression that is /// used to initialize the transparent union. static void ConstructTransparentUnion(Sema &S, ASTContext &C, ExprResult &EResult, QualType UnionType, FieldDecl *Field) { // Build an initializer list that designates the appropriate member // of the transparent union. Expr *E = EResult.get(); InitListExpr *Initializer = new (C) InitListExpr(C, SourceLocation(), E, SourceLocation()); Initializer->setType(UnionType); Initializer->setInitializedFieldInUnion(Field); // Build a compound literal constructing a value of the transparent // union type from this initializer list. TypeSourceInfo *unionTInfo = C.getTrivialTypeSourceInfo(UnionType); EResult = new (C) CompoundLiteralExpr(SourceLocation(), unionTInfo, UnionType, VK_PRValue, Initializer, false); } Sema::AssignConvertType Sema::CheckTransparentUnionArgumentConstraints(QualType ArgType, ExprResult &RHS) { QualType RHSType = RHS.get()->getType(); // If the ArgType is a Union type, we want to handle a potential // transparent_union GCC extension. const RecordType *UT = ArgType->getAsUnionType(); if (!UT || !UT->getDecl()->hasAttr()) return Incompatible; // The field to initialize within the transparent union. RecordDecl *UD = UT->getDecl(); FieldDecl *InitField = nullptr; // It's compatible if the expression matches any of the fields. for (auto *it : UD->fields()) { if (it->getType()->isPointerType()) { // If the transparent union contains a pointer type, we allow: // 1) void pointer // 2) null pointer constant if (RHSType->isPointerType()) if (RHSType->castAs()->getPointeeType()->isVoidType()) { RHS = ImpCastExprToType(RHS.get(), it->getType(), CK_BitCast); InitField = it; break; } if (RHS.get()->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull)) { RHS = ImpCastExprToType(RHS.get(), it->getType(), CK_NullToPointer); InitField = it; break; } } CastKind Kind; if (CheckAssignmentConstraints(it->getType(), RHS, Kind) == Compatible) { RHS = ImpCastExprToType(RHS.get(), it->getType(), Kind); InitField = it; break; } } if (!InitField) return Incompatible; ConstructTransparentUnion(*this, Context, RHS, ArgType, InitField); return Compatible; } Sema::AssignConvertType Sema::CheckSingleAssignmentConstraints(QualType LHSType, ExprResult &CallerRHS, bool Diagnose, bool DiagnoseCFAudited, bool ConvertRHS) { // We need to be able to tell the caller whether we diagnosed a problem, if // they ask us to issue diagnostics. assert((ConvertRHS || !Diagnose) && "can't indicate whether we diagnosed"); // If ConvertRHS is false, we want to leave the caller's RHS untouched. Sadly, // we can't avoid *all* modifications at the moment, so we need some somewhere // to put the updated value. ExprResult LocalRHS = CallerRHS; ExprResult &RHS = ConvertRHS ? CallerRHS : LocalRHS; if (const auto *LHSPtrType = LHSType->getAs()) { if (const auto *RHSPtrType = RHS.get()->getType()->getAs()) { if (RHSPtrType->getPointeeType()->hasAttr(attr::NoDeref) && !LHSPtrType->getPointeeType()->hasAttr(attr::NoDeref)) { Diag(RHS.get()->getExprLoc(), diag::warn_noderef_to_dereferenceable_pointer) << RHS.get()->getSourceRange(); } } } if (getLangOpts().CPlusPlus) { if (!LHSType->isRecordType() && !LHSType->isAtomicType()) { // C++ 5.17p3: If the left operand is not of class type, the // expression is implicitly converted (C++ 4) to the // cv-unqualified type of the left operand. QualType RHSType = RHS.get()->getType(); if (Diagnose) { RHS = PerformImplicitConversion(RHS.get(), LHSType.getUnqualifiedType(), AA_Assigning); } else { ImplicitConversionSequence ICS = TryImplicitConversion(RHS.get(), LHSType.getUnqualifiedType(), /*SuppressUserConversions=*/false, AllowedExplicit::None, /*InOverloadResolution=*/false, /*CStyle=*/false, /*AllowObjCWritebackConversion=*/false); if (ICS.isFailure()) return Incompatible; RHS = PerformImplicitConversion(RHS.get(), LHSType.getUnqualifiedType(), ICS, AA_Assigning); } if (RHS.isInvalid()) return Incompatible; Sema::AssignConvertType result = Compatible; if (getLangOpts().allowsNonTrivialObjCLifetimeQualifiers() && !CheckObjCARCUnavailableWeakConversion(LHSType, RHSType)) result = IncompatibleObjCWeakRef; return result; } // FIXME: Currently, we fall through and treat C++ classes like C // structures. // FIXME: We also fall through for atomics; not sure what should // happen there, though. } else if (RHS.get()->getType() == Context.OverloadTy) { // As a set of extensions to C, we support overloading on functions. These // functions need to be resolved here. DeclAccessPair DAP; if (FunctionDecl *FD = ResolveAddressOfOverloadedFunction( RHS.get(), LHSType, /*Complain=*/false, DAP)) RHS = FixOverloadedFunctionReference(RHS.get(), DAP, FD); else return Incompatible; } // C99 6.5.16.1p1: the left operand is a pointer and the right is // a null pointer constant. if ((LHSType->isPointerType() || LHSType->isObjCObjectPointerType() || LHSType->isBlockPointerType()) && RHS.get()->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull)) { if (Diagnose || ConvertRHS) { CastKind Kind; CXXCastPath Path; CheckPointerConversion(RHS.get(), LHSType, Kind, Path, /*IgnoreBaseAccess=*/false, Diagnose); if (ConvertRHS) RHS = ImpCastExprToType(RHS.get(), LHSType, Kind, VK_PRValue, &Path); } return Compatible; } // OpenCL queue_t type assignment. if (LHSType->isQueueT() && RHS.get()->isNullPointerConstant( Context, Expr::NPC_ValueDependentIsNull)) { RHS = ImpCastExprToType(RHS.get(), LHSType, CK_NullToPointer); return Compatible; } // This check seems unnatural, however it is necessary to ensure the proper // conversion of functions/arrays. If the conversion were done for all // DeclExpr's (created by ActOnIdExpression), it would mess up the unary // expressions that suppress this implicit conversion (&, sizeof). // // Suppress this for references: C++ 8.5.3p5. if (!LHSType->isReferenceType()) { // FIXME: We potentially allocate here even if ConvertRHS is false. RHS = DefaultFunctionArrayLvalueConversion(RHS.get(), Diagnose); if (RHS.isInvalid()) return Incompatible; } CastKind Kind; Sema::AssignConvertType result = CheckAssignmentConstraints(LHSType, RHS, Kind, ConvertRHS); // C99 6.5.16.1p2: The value of the right operand is converted to the // type of the assignment expression. // CheckAssignmentConstraints allows the left-hand side to be a reference, // so that we can use references in built-in functions even in C. // The getNonReferenceType() call makes sure that the resulting expression // does not have reference type. if (result != Incompatible && RHS.get()->getType() != LHSType) { QualType Ty = LHSType.getNonLValueExprType(Context); Expr *E = RHS.get(); // Check for various Objective-C errors. If we are not reporting // diagnostics and just checking for errors, e.g., during overload // resolution, return Incompatible to indicate the failure. if (getLangOpts().allowsNonTrivialObjCLifetimeQualifiers() && CheckObjCConversion(SourceRange(), Ty, E, CCK_ImplicitConversion, Diagnose, DiagnoseCFAudited) != ACR_okay) { if (!Diagnose) return Incompatible; } if (getLangOpts().ObjC && (CheckObjCBridgeRelatedConversions(E->getBeginLoc(), LHSType, E->getType(), E, Diagnose) || CheckConversionToObjCLiteral(LHSType, E, Diagnose))) { if (!Diagnose) return Incompatible; // Replace the expression with a corrected version and continue so we // can find further errors. RHS = E; return Compatible; } if (ConvertRHS) RHS = ImpCastExprToType(E, Ty, Kind); } return result; } namespace { /// The original operand to an operator, prior to the application of the usual /// arithmetic conversions and converting the arguments of a builtin operator /// candidate. struct OriginalOperand { explicit OriginalOperand(Expr *Op) : Orig(Op), Conversion(nullptr) { if (auto *MTE = dyn_cast(Op)) Op = MTE->getSubExpr(); if (auto *BTE = dyn_cast(Op)) Op = BTE->getSubExpr(); if (auto *ICE = dyn_cast(Op)) { Orig = ICE->getSubExprAsWritten(); Conversion = ICE->getConversionFunction(); } } QualType getType() const { return Orig->getType(); } Expr *Orig; NamedDecl *Conversion; }; } QualType Sema::InvalidOperands(SourceLocation Loc, ExprResult &LHS, ExprResult &RHS) { OriginalOperand OrigLHS(LHS.get()), OrigRHS(RHS.get()); Diag(Loc, diag::err_typecheck_invalid_operands) << OrigLHS.getType() << OrigRHS.getType() << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); // If a user-defined conversion was applied to either of the operands prior // to applying the built-in operator rules, tell the user about it. if (OrigLHS.Conversion) { Diag(OrigLHS.Conversion->getLocation(), diag::note_typecheck_invalid_operands_converted) << 0 << LHS.get()->getType(); } if (OrigRHS.Conversion) { Diag(OrigRHS.Conversion->getLocation(), diag::note_typecheck_invalid_operands_converted) << 1 << RHS.get()->getType(); } return QualType(); } // Diagnose cases where a scalar was implicitly converted to a vector and // diagnose the underlying types. Otherwise, diagnose the error // as invalid vector logical operands for non-C++ cases. QualType Sema::InvalidLogicalVectorOperands(SourceLocation Loc, ExprResult &LHS, ExprResult &RHS) { QualType LHSType = LHS.get()->IgnoreImpCasts()->getType(); QualType RHSType = RHS.get()->IgnoreImpCasts()->getType(); bool LHSNatVec = LHSType->isVectorType(); bool RHSNatVec = RHSType->isVectorType(); if (!(LHSNatVec && RHSNatVec)) { Expr *Vector = LHSNatVec ? LHS.get() : RHS.get(); Expr *NonVector = !LHSNatVec ? LHS.get() : RHS.get(); Diag(Loc, diag::err_typecheck_logical_vector_expr_gnu_cpp_restrict) << 0 << Vector->getType() << NonVector->IgnoreImpCasts()->getType() << Vector->getSourceRange(); return QualType(); } Diag(Loc, diag::err_typecheck_logical_vector_expr_gnu_cpp_restrict) << 1 << LHSType << RHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } /// Try to convert a value of non-vector type to a vector type by converting /// the type to the element type of the vector and then performing a splat. /// If the language is OpenCL, we only use conversions that promote scalar /// rank; for C, Obj-C, and C++ we allow any real scalar conversion except /// for float->int. /// /// OpenCL V2.0 6.2.6.p2: /// An error shall occur if any scalar operand type has greater rank /// than the type of the vector element. /// /// \param scalar - if non-null, actually perform the conversions /// \return true if the operation fails (but without diagnosing the failure) static bool tryVectorConvertAndSplat(Sema &S, ExprResult *scalar, QualType scalarTy, QualType vectorEltTy, QualType vectorTy, unsigned &DiagID) { // The conversion to apply to the scalar before splatting it, // if necessary. CastKind scalarCast = CK_NoOp; if (vectorEltTy->isIntegralType(S.Context)) { if (S.getLangOpts().OpenCL && (scalarTy->isRealFloatingType() || (scalarTy->isIntegerType() && S.Context.getIntegerTypeOrder(vectorEltTy, scalarTy) < 0))) { DiagID = diag::err_opencl_scalar_type_rank_greater_than_vector_type; return true; } if (!scalarTy->isIntegralType(S.Context)) return true; scalarCast = CK_IntegralCast; } else if (vectorEltTy->isRealFloatingType()) { if (scalarTy->isRealFloatingType()) { if (S.getLangOpts().OpenCL && S.Context.getFloatingTypeOrder(vectorEltTy, scalarTy) < 0) { DiagID = diag::err_opencl_scalar_type_rank_greater_than_vector_type; return true; } scalarCast = CK_FloatingCast; } else if (scalarTy->isIntegralType(S.Context)) scalarCast = CK_IntegralToFloating; else return true; } else { return true; } // Adjust scalar if desired. if (scalar) { if (scalarCast != CK_NoOp) *scalar = S.ImpCastExprToType(scalar->get(), vectorEltTy, scalarCast); *scalar = S.ImpCastExprToType(scalar->get(), vectorTy, CK_VectorSplat); } return false; } /// Convert vector E to a vector with the same number of elements but different /// element type. static ExprResult convertVector(Expr *E, QualType ElementType, Sema &S) { const auto *VecTy = E->getType()->getAs(); assert(VecTy && "Expression E must be a vector"); QualType NewVecTy = VecTy->isExtVectorType() ? S.Context.getExtVectorType(ElementType, VecTy->getNumElements()) : S.Context.getVectorType(ElementType, VecTy->getNumElements(), VecTy->getVectorKind()); // Look through the implicit cast. Return the subexpression if its type is // NewVecTy. if (auto *ICE = dyn_cast(E)) if (ICE->getSubExpr()->getType() == NewVecTy) return ICE->getSubExpr(); auto Cast = ElementType->isIntegerType() ? CK_IntegralCast : CK_FloatingCast; return S.ImpCastExprToType(E, NewVecTy, Cast); } /// Test if a (constant) integer Int can be casted to another integer type /// IntTy without losing precision. static bool canConvertIntToOtherIntTy(Sema &S, ExprResult *Int, QualType OtherIntTy) { QualType IntTy = Int->get()->getType().getUnqualifiedType(); // Reject cases where the value of the Int is unknown as that would // possibly cause truncation, but accept cases where the scalar can be // demoted without loss of precision. Expr::EvalResult EVResult; bool CstInt = Int->get()->EvaluateAsInt(EVResult, S.Context); int Order = S.Context.getIntegerTypeOrder(OtherIntTy, IntTy); bool IntSigned = IntTy->hasSignedIntegerRepresentation(); bool OtherIntSigned = OtherIntTy->hasSignedIntegerRepresentation(); if (CstInt) { // If the scalar is constant and is of a higher order and has more active // bits that the vector element type, reject it. llvm::APSInt Result = EVResult.Val.getInt(); unsigned NumBits = IntSigned ? (Result.isNegative() ? Result.getMinSignedBits() : Result.getActiveBits()) : Result.getActiveBits(); if (Order < 0 && S.Context.getIntWidth(OtherIntTy) < NumBits) return true; // If the signedness of the scalar type and the vector element type // differs and the number of bits is greater than that of the vector // element reject it. return (IntSigned != OtherIntSigned && NumBits > S.Context.getIntWidth(OtherIntTy)); } // Reject cases where the value of the scalar is not constant and it's // order is greater than that of the vector element type. return (Order < 0); } /// Test if a (constant) integer Int can be casted to floating point type /// FloatTy without losing precision. static bool canConvertIntTyToFloatTy(Sema &S, ExprResult *Int, QualType FloatTy) { QualType IntTy = Int->get()->getType().getUnqualifiedType(); // Determine if the integer constant can be expressed as a floating point // number of the appropriate type. Expr::EvalResult EVResult; bool CstInt = Int->get()->EvaluateAsInt(EVResult, S.Context); uint64_t Bits = 0; if (CstInt) { // Reject constants that would be truncated if they were converted to // the floating point type. Test by simple to/from conversion. // FIXME: Ideally the conversion to an APFloat and from an APFloat // could be avoided if there was a convertFromAPInt method // which could signal back if implicit truncation occurred. llvm::APSInt Result = EVResult.Val.getInt(); llvm::APFloat Float(S.Context.getFloatTypeSemantics(FloatTy)); Float.convertFromAPInt(Result, IntTy->hasSignedIntegerRepresentation(), llvm::APFloat::rmTowardZero); llvm::APSInt ConvertBack(S.Context.getIntWidth(IntTy), !IntTy->hasSignedIntegerRepresentation()); bool Ignored = false; Float.convertToInteger(ConvertBack, llvm::APFloat::rmNearestTiesToEven, &Ignored); if (Result != ConvertBack) return true; } else { // Reject types that cannot be fully encoded into the mantissa of // the float. Bits = S.Context.getTypeSize(IntTy); unsigned FloatPrec = llvm::APFloat::semanticsPrecision( S.Context.getFloatTypeSemantics(FloatTy)); if (Bits > FloatPrec) return true; } return false; } /// Attempt to convert and splat Scalar into a vector whose types matches /// Vector following GCC conversion rules. The rule is that implicit /// conversion can occur when Scalar can be casted to match Vector's element /// type without causing truncation of Scalar. static bool tryGCCVectorConvertAndSplat(Sema &S, ExprResult *Scalar, ExprResult *Vector) { QualType ScalarTy = Scalar->get()->getType().getUnqualifiedType(); QualType VectorTy = Vector->get()->getType().getUnqualifiedType(); QualType VectorEltTy; if (const auto *VT = VectorTy->getAs()) { assert(!isa(VT) && "ExtVectorTypes should not be handled here!"); VectorEltTy = VT->getElementType(); } else if (VectorTy->isVLSTBuiltinType()) { VectorEltTy = VectorTy->castAs()->getSveEltType(S.getASTContext()); } else { llvm_unreachable("Only Fixed-Length and SVE Vector types are handled here"); } // Reject cases where the vector element type or the scalar element type are // not integral or floating point types. if (!VectorEltTy->isArithmeticType() || !ScalarTy->isArithmeticType()) return true; // The conversion to apply to the scalar before splatting it, // if necessary. CastKind ScalarCast = CK_NoOp; // Accept cases where the vector elements are integers and the scalar is // an integer. // FIXME: Notionally if the scalar was a floating point value with a precise // integral representation, we could cast it to an appropriate integer // type and then perform the rest of the checks here. GCC will perform // this conversion in some cases as determined by the input language. // We should accept it on a language independent basis. if (VectorEltTy->isIntegralType(S.Context) && ScalarTy->isIntegralType(S.Context) && S.Context.getIntegerTypeOrder(VectorEltTy, ScalarTy)) { if (canConvertIntToOtherIntTy(S, Scalar, VectorEltTy)) return true; ScalarCast = CK_IntegralCast; } else if (VectorEltTy->isIntegralType(S.Context) && ScalarTy->isRealFloatingType()) { if (S.Context.getTypeSize(VectorEltTy) == S.Context.getTypeSize(ScalarTy)) ScalarCast = CK_FloatingToIntegral; else return true; } else if (VectorEltTy->isRealFloatingType()) { if (ScalarTy->isRealFloatingType()) { // Reject cases where the scalar type is not a constant and has a higher // Order than the vector element type. llvm::APFloat Result(0.0); // Determine whether this is a constant scalar. In the event that the // value is dependent (and thus cannot be evaluated by the constant // evaluator), skip the evaluation. This will then diagnose once the // expression is instantiated. bool CstScalar = Scalar->get()->isValueDependent() || Scalar->get()->EvaluateAsFloat(Result, S.Context); int Order = S.Context.getFloatingTypeOrder(VectorEltTy, ScalarTy); if (!CstScalar && Order < 0) return true; // If the scalar cannot be safely casted to the vector element type, // reject it. if (CstScalar) { bool Truncated = false; Result.convert(S.Context.getFloatTypeSemantics(VectorEltTy), llvm::APFloat::rmNearestTiesToEven, &Truncated); if (Truncated) return true; } ScalarCast = CK_FloatingCast; } else if (ScalarTy->isIntegralType(S.Context)) { if (canConvertIntTyToFloatTy(S, Scalar, VectorEltTy)) return true; ScalarCast = CK_IntegralToFloating; } else return true; } else if (ScalarTy->isEnumeralType()) return true; // Adjust scalar if desired. if (Scalar) { if (ScalarCast != CK_NoOp) *Scalar = S.ImpCastExprToType(Scalar->get(), VectorEltTy, ScalarCast); *Scalar = S.ImpCastExprToType(Scalar->get(), VectorTy, CK_VectorSplat); } return false; } QualType Sema::CheckVectorOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsCompAssign, bool AllowBothBool, bool AllowBoolConversions, bool AllowBoolOperation, bool ReportInvalid) { if (!IsCompAssign) { LHS = DefaultFunctionArrayLvalueConversion(LHS.get()); if (LHS.isInvalid()) return QualType(); } RHS = DefaultFunctionArrayLvalueConversion(RHS.get()); if (RHS.isInvalid()) return QualType(); // For conversion purposes, we ignore any qualifiers. // For example, "const float" and "float" are equivalent. QualType LHSType = LHS.get()->getType().getUnqualifiedType(); QualType RHSType = RHS.get()->getType().getUnqualifiedType(); const VectorType *LHSVecType = LHSType->getAs(); const VectorType *RHSVecType = RHSType->getAs(); assert(LHSVecType || RHSVecType); if ((LHSVecType && LHSVecType->getElementType()->isBFloat16Type()) || (RHSVecType && RHSVecType->getElementType()->isBFloat16Type())) return ReportInvalid ? InvalidOperands(Loc, LHS, RHS) : QualType(); // AltiVec-style "vector bool op vector bool" combinations are allowed // for some operators but not others. if (!AllowBothBool && LHSVecType && LHSVecType->getVectorKind() == VectorType::AltiVecBool && RHSVecType && RHSVecType->getVectorKind() == VectorType::AltiVecBool) return ReportInvalid ? InvalidOperands(Loc, LHS, RHS) : QualType(); // This operation may not be performed on boolean vectors. if (!AllowBoolOperation && (LHSType->isExtVectorBoolType() || RHSType->isExtVectorBoolType())) return ReportInvalid ? InvalidOperands(Loc, LHS, RHS) : QualType(); // If the vector types are identical, return. if (Context.hasSameType(LHSType, RHSType)) return LHSType; // If we have compatible AltiVec and GCC vector types, use the AltiVec type. if (LHSVecType && RHSVecType && Context.areCompatibleVectorTypes(LHSType, RHSType)) { if (isa(LHSVecType)) { RHS = ImpCastExprToType(RHS.get(), LHSType, CK_BitCast); return LHSType; } if (!IsCompAssign) LHS = ImpCastExprToType(LHS.get(), RHSType, CK_BitCast); return RHSType; } // AllowBoolConversions says that bool and non-bool AltiVec vectors // can be mixed, with the result being the non-bool type. The non-bool // operand must have integer element type. if (AllowBoolConversions && LHSVecType && RHSVecType && LHSVecType->getNumElements() == RHSVecType->getNumElements() && (Context.getTypeSize(LHSVecType->getElementType()) == Context.getTypeSize(RHSVecType->getElementType()))) { if (LHSVecType->getVectorKind() == VectorType::AltiVecVector && LHSVecType->getElementType()->isIntegerType() && RHSVecType->getVectorKind() == VectorType::AltiVecBool) { RHS = ImpCastExprToType(RHS.get(), LHSType, CK_BitCast); return LHSType; } if (!IsCompAssign && LHSVecType->getVectorKind() == VectorType::AltiVecBool && RHSVecType->getVectorKind() == VectorType::AltiVecVector && RHSVecType->getElementType()->isIntegerType()) { LHS = ImpCastExprToType(LHS.get(), RHSType, CK_BitCast); return RHSType; } } // Expressions containing fixed-length and sizeless SVE vectors are invalid // since the ambiguity can affect the ABI. auto IsSveConversion = [](QualType FirstType, QualType SecondType) { const VectorType *VecType = SecondType->getAs(); return FirstType->isSizelessBuiltinType() && VecType && (VecType->getVectorKind() == VectorType::SveFixedLengthDataVector || VecType->getVectorKind() == VectorType::SveFixedLengthPredicateVector); }; if (IsSveConversion(LHSType, RHSType) || IsSveConversion(RHSType, LHSType)) { Diag(Loc, diag::err_typecheck_sve_ambiguous) << LHSType << RHSType; return QualType(); } // Expressions containing GNU and SVE (fixed or sizeless) vectors are invalid // since the ambiguity can affect the ABI. auto IsSveGnuConversion = [](QualType FirstType, QualType SecondType) { const VectorType *FirstVecType = FirstType->getAs(); const VectorType *SecondVecType = SecondType->getAs(); if (FirstVecType && SecondVecType) return FirstVecType->getVectorKind() == VectorType::GenericVector && (SecondVecType->getVectorKind() == VectorType::SveFixedLengthDataVector || SecondVecType->getVectorKind() == VectorType::SveFixedLengthPredicateVector); return FirstType->isSizelessBuiltinType() && SecondVecType && SecondVecType->getVectorKind() == VectorType::GenericVector; }; if (IsSveGnuConversion(LHSType, RHSType) || IsSveGnuConversion(RHSType, LHSType)) { Diag(Loc, diag::err_typecheck_sve_gnu_ambiguous) << LHSType << RHSType; return QualType(); } // If there's a vector type and a scalar, try to convert the scalar to // the vector element type and splat. unsigned DiagID = diag::err_typecheck_vector_not_convertable; if (!RHSVecType) { if (isa(LHSVecType)) { if (!tryVectorConvertAndSplat(*this, &RHS, RHSType, LHSVecType->getElementType(), LHSType, DiagID)) return LHSType; } else { if (!tryGCCVectorConvertAndSplat(*this, &RHS, &LHS)) return LHSType; } } if (!LHSVecType) { if (isa(RHSVecType)) { if (!tryVectorConvertAndSplat(*this, (IsCompAssign ? nullptr : &LHS), LHSType, RHSVecType->getElementType(), RHSType, DiagID)) return RHSType; } else { if (LHS.get()->isLValue() || !tryGCCVectorConvertAndSplat(*this, &LHS, &RHS)) return RHSType; } } // FIXME: The code below also handles conversion between vectors and // non-scalars, we should break this down into fine grained specific checks // and emit proper diagnostics. QualType VecType = LHSVecType ? LHSType : RHSType; const VectorType *VT = LHSVecType ? LHSVecType : RHSVecType; QualType OtherType = LHSVecType ? RHSType : LHSType; ExprResult *OtherExpr = LHSVecType ? &RHS : &LHS; if (isLaxVectorConversion(OtherType, VecType)) { if (anyAltivecTypes(RHSType, LHSType) && !areSameVectorElemTypes(RHSType, LHSType)) Diag(Loc, diag::warn_deprecated_lax_vec_conv_all) << RHSType << LHSType; // If we're allowing lax vector conversions, only the total (data) size // needs to be the same. For non compound assignment, if one of the types is // scalar, the result is always the vector type. if (!IsCompAssign) { *OtherExpr = ImpCastExprToType(OtherExpr->get(), VecType, CK_BitCast); return VecType; // In a compound assignment, lhs += rhs, 'lhs' is a lvalue src, forbidding // any implicit cast. Here, the 'rhs' should be implicit casted to 'lhs' // type. Note that this is already done by non-compound assignments in // CheckAssignmentConstraints. If it's a scalar type, only bitcast for // <1 x T> -> T. The result is also a vector type. } else if (OtherType->isExtVectorType() || OtherType->isVectorType() || (OtherType->isScalarType() && VT->getNumElements() == 1)) { ExprResult *RHSExpr = &RHS; *RHSExpr = ImpCastExprToType(RHSExpr->get(), LHSType, CK_BitCast); return VecType; } } // Okay, the expression is invalid. // If there's a non-vector, non-real operand, diagnose that. if ((!RHSVecType && !RHSType->isRealType()) || (!LHSVecType && !LHSType->isRealType())) { Diag(Loc, diag::err_typecheck_vector_not_convertable_non_scalar) << LHSType << RHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } // OpenCL V1.1 6.2.6.p1: // If the operands are of more than one vector type, then an error shall // occur. Implicit conversions between vector types are not permitted, per // section 6.2.1. if (getLangOpts().OpenCL && RHSVecType && isa(RHSVecType) && LHSVecType && isa(LHSVecType)) { Diag(Loc, diag::err_opencl_implicit_vector_conversion) << LHSType << RHSType; return QualType(); } // If there is a vector type that is not a ExtVector and a scalar, we reach // this point if scalar could not be converted to the vector's element type // without truncation. if ((RHSVecType && !isa(RHSVecType)) || (LHSVecType && !isa(LHSVecType))) { QualType Scalar = LHSVecType ? RHSType : LHSType; QualType Vector = LHSVecType ? LHSType : RHSType; unsigned ScalarOrVector = LHSVecType && RHSVecType ? 1 : 0; Diag(Loc, diag::err_typecheck_vector_not_convertable_implict_truncation) << ScalarOrVector << Scalar << Vector; return QualType(); } // Otherwise, use the generic diagnostic. Diag(Loc, DiagID) << LHSType << RHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } QualType Sema::CheckSizelessVectorOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsCompAssign, ArithConvKind OperationKind) { if (!IsCompAssign) { LHS = DefaultFunctionArrayLvalueConversion(LHS.get()); if (LHS.isInvalid()) return QualType(); } RHS = DefaultFunctionArrayLvalueConversion(RHS.get()); if (RHS.isInvalid()) return QualType(); QualType LHSType = LHS.get()->getType().getUnqualifiedType(); QualType RHSType = RHS.get()->getType().getUnqualifiedType(); const BuiltinType *LHSBuiltinTy = LHSType->getAs(); const BuiltinType *RHSBuiltinTy = RHSType->getAs(); unsigned DiagID = diag::err_typecheck_invalid_operands; if ((OperationKind == ACK_Arithmetic) && ((LHSBuiltinTy && LHSBuiltinTy->isSVEBool()) || (RHSBuiltinTy && RHSBuiltinTy->isSVEBool()))) { Diag(Loc, DiagID) << LHSType << RHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } if (Context.hasSameType(LHSType, RHSType)) return LHSType; if (LHSType->isVLSTBuiltinType() && !RHSType->isVLSTBuiltinType()) { if (!tryGCCVectorConvertAndSplat(*this, &RHS, &LHS)) return LHSType; } if (RHSType->isVLSTBuiltinType() && !LHSType->isVLSTBuiltinType()) { if (LHS.get()->isLValue() || !tryGCCVectorConvertAndSplat(*this, &LHS, &RHS)) return RHSType; } if ((!LHSType->isVLSTBuiltinType() && !LHSType->isRealType()) || (!RHSType->isVLSTBuiltinType() && !RHSType->isRealType())) { Diag(Loc, diag::err_typecheck_vector_not_convertable_non_scalar) << LHSType << RHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } if (LHSType->isVLSTBuiltinType() && RHSType->isVLSTBuiltinType() && Context.getBuiltinVectorTypeInfo(LHSBuiltinTy).EC != Context.getBuiltinVectorTypeInfo(RHSBuiltinTy).EC) { Diag(Loc, diag::err_typecheck_vector_lengths_not_equal) << LHSType << RHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } if (LHSType->isVLSTBuiltinType() || RHSType->isVLSTBuiltinType()) { QualType Scalar = LHSType->isVLSTBuiltinType() ? RHSType : LHSType; QualType Vector = LHSType->isVLSTBuiltinType() ? LHSType : RHSType; bool ScalarOrVector = LHSType->isVLSTBuiltinType() && RHSType->isVLSTBuiltinType(); Diag(Loc, diag::err_typecheck_vector_not_convertable_implict_truncation) << ScalarOrVector << Scalar << Vector; return QualType(); } Diag(Loc, DiagID) << LHSType << RHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } // checkArithmeticNull - Detect when a NULL constant is used improperly in an // expression. These are mainly cases where the null pointer is used as an // integer instead of a pointer. static void checkArithmeticNull(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsCompare) { // The canonical way to check for a GNU null is with isNullPointerConstant, // but we use a bit of a hack here for speed; this is a relatively // hot path, and isNullPointerConstant is slow. bool LHSNull = isa(LHS.get()->IgnoreParenImpCasts()); bool RHSNull = isa(RHS.get()->IgnoreParenImpCasts()); QualType NonNullType = LHSNull ? RHS.get()->getType() : LHS.get()->getType(); // Avoid analyzing cases where the result will either be invalid (and // diagnosed as such) or entirely valid and not something to warn about. if ((!LHSNull && !RHSNull) || NonNullType->isBlockPointerType() || NonNullType->isMemberPointerType() || NonNullType->isFunctionType()) return; // Comparison operations would not make sense with a null pointer no matter // what the other expression is. if (!IsCompare) { S.Diag(Loc, diag::warn_null_in_arithmetic_operation) << (LHSNull ? LHS.get()->getSourceRange() : SourceRange()) << (RHSNull ? RHS.get()->getSourceRange() : SourceRange()); return; } // The rest of the operations only make sense with a null pointer // if the other expression is a pointer. if (LHSNull == RHSNull || NonNullType->isAnyPointerType() || NonNullType->canDecayToPointerType()) return; S.Diag(Loc, diag::warn_null_in_comparison_operation) << LHSNull /* LHS is NULL */ << NonNullType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } static void DiagnoseDivisionSizeofPointerOrArray(Sema &S, Expr *LHS, Expr *RHS, SourceLocation Loc) { const auto *LUE = dyn_cast(LHS); const auto *RUE = dyn_cast(RHS); if (!LUE || !RUE) return; if (LUE->getKind() != UETT_SizeOf || LUE->isArgumentType() || RUE->getKind() != UETT_SizeOf) return; const Expr *LHSArg = LUE->getArgumentExpr()->IgnoreParens(); QualType LHSTy = LHSArg->getType(); QualType RHSTy; if (RUE->isArgumentType()) RHSTy = RUE->getArgumentType().getNonReferenceType(); else RHSTy = RUE->getArgumentExpr()->IgnoreParens()->getType(); if (LHSTy->isPointerType() && !RHSTy->isPointerType()) { if (!S.Context.hasSameUnqualifiedType(LHSTy->getPointeeType(), RHSTy)) return; S.Diag(Loc, diag::warn_division_sizeof_ptr) << LHS << LHS->getSourceRange(); if (const auto *DRE = dyn_cast(LHSArg)) { if (const ValueDecl *LHSArgDecl = DRE->getDecl()) S.Diag(LHSArgDecl->getLocation(), diag::note_pointer_declared_here) << LHSArgDecl; } } else if (const auto *ArrayTy = S.Context.getAsArrayType(LHSTy)) { QualType ArrayElemTy = ArrayTy->getElementType(); if (ArrayElemTy != S.Context.getBaseElementType(ArrayTy) || ArrayElemTy->isDependentType() || RHSTy->isDependentType() || RHSTy->isReferenceType() || ArrayElemTy->isCharType() || S.Context.getTypeSize(ArrayElemTy) == S.Context.getTypeSize(RHSTy)) return; S.Diag(Loc, diag::warn_division_sizeof_array) << LHSArg->getSourceRange() << ArrayElemTy << RHSTy; if (const auto *DRE = dyn_cast(LHSArg)) { if (const ValueDecl *LHSArgDecl = DRE->getDecl()) S.Diag(LHSArgDecl->getLocation(), diag::note_array_declared_here) << LHSArgDecl; } S.Diag(Loc, diag::note_precedence_silence) << RHS; } } static void DiagnoseBadDivideOrRemainderValues(Sema& S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsDiv) { // Check for division/remainder by zero. Expr::EvalResult RHSValue; if (!RHS.get()->isValueDependent() && RHS.get()->EvaluateAsInt(RHSValue, S.Context) && RHSValue.Val.getInt() == 0) S.DiagRuntimeBehavior(Loc, RHS.get(), S.PDiag(diag::warn_remainder_division_by_zero) << IsDiv << RHS.get()->getSourceRange()); } QualType Sema::CheckMultiplyDivideOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsCompAssign, bool IsDiv) { checkArithmeticNull(*this, LHS, RHS, Loc, /*IsCompare=*/false); QualType LHSTy = LHS.get()->getType(); QualType RHSTy = RHS.get()->getType(); if (LHSTy->isVectorType() || RHSTy->isVectorType()) return CheckVectorOperands(LHS, RHS, Loc, IsCompAssign, /*AllowBothBool*/ getLangOpts().AltiVec, /*AllowBoolConversions*/ false, /*AllowBooleanOperation*/ false, /*ReportInvalid*/ true); if (LHSTy->isVLSTBuiltinType() || RHSTy->isVLSTBuiltinType()) return CheckSizelessVectorOperands(LHS, RHS, Loc, IsCompAssign, ACK_Arithmetic); if (!IsDiv && (LHSTy->isConstantMatrixType() || RHSTy->isConstantMatrixType())) return CheckMatrixMultiplyOperands(LHS, RHS, Loc, IsCompAssign); // For division, only matrix-by-scalar is supported. Other combinations with // matrix types are invalid. if (IsDiv && LHSTy->isConstantMatrixType() && RHSTy->isArithmeticType()) return CheckMatrixElementwiseOperands(LHS, RHS, Loc, IsCompAssign); QualType compType = UsualArithmeticConversions( LHS, RHS, Loc, IsCompAssign ? ACK_CompAssign : ACK_Arithmetic); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); if (compType.isNull() || !compType->isArithmeticType()) return InvalidOperands(Loc, LHS, RHS); if (IsDiv) { DiagnoseBadDivideOrRemainderValues(*this, LHS, RHS, Loc, IsDiv); DiagnoseDivisionSizeofPointerOrArray(*this, LHS.get(), RHS.get(), Loc); } return compType; } QualType Sema::CheckRemainderOperands( ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsCompAssign) { checkArithmeticNull(*this, LHS, RHS, Loc, /*IsCompare=*/false); if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) { if (LHS.get()->getType()->hasIntegerRepresentation() && RHS.get()->getType()->hasIntegerRepresentation()) return CheckVectorOperands(LHS, RHS, Loc, IsCompAssign, /*AllowBothBool*/ getLangOpts().AltiVec, /*AllowBoolConversions*/ false, /*AllowBooleanOperation*/ false, /*ReportInvalid*/ true); return InvalidOperands(Loc, LHS, RHS); } if (LHS.get()->getType()->isVLSTBuiltinType() || RHS.get()->getType()->isVLSTBuiltinType()) { if (LHS.get()->getType()->hasIntegerRepresentation() && RHS.get()->getType()->hasIntegerRepresentation()) return CheckSizelessVectorOperands(LHS, RHS, Loc, IsCompAssign, ACK_Arithmetic); return InvalidOperands(Loc, LHS, RHS); } QualType compType = UsualArithmeticConversions( LHS, RHS, Loc, IsCompAssign ? ACK_CompAssign : ACK_Arithmetic); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); if (compType.isNull() || !compType->isIntegerType()) return InvalidOperands(Loc, LHS, RHS); DiagnoseBadDivideOrRemainderValues(*this, LHS, RHS, Loc, false /* IsDiv */); return compType; } /// Diagnose invalid arithmetic on two void pointers. static void diagnoseArithmeticOnTwoVoidPointers(Sema &S, SourceLocation Loc, Expr *LHSExpr, Expr *RHSExpr) { S.Diag(Loc, S.getLangOpts().CPlusPlus ? diag::err_typecheck_pointer_arith_void_type : diag::ext_gnu_void_ptr) << 1 /* two pointers */ << LHSExpr->getSourceRange() << RHSExpr->getSourceRange(); } /// Diagnose invalid arithmetic on a void pointer. static void diagnoseArithmeticOnVoidPointer(Sema &S, SourceLocation Loc, Expr *Pointer) { S.Diag(Loc, S.getLangOpts().CPlusPlus ? diag::err_typecheck_pointer_arith_void_type : diag::ext_gnu_void_ptr) << 0 /* one pointer */ << Pointer->getSourceRange(); } /// Diagnose invalid arithmetic on a null pointer. /// /// If \p IsGNUIdiom is true, the operation is using the 'p = (i8*)nullptr + n' /// idiom, which we recognize as a GNU extension. /// static void diagnoseArithmeticOnNullPointer(Sema &S, SourceLocation Loc, Expr *Pointer, bool IsGNUIdiom) { if (IsGNUIdiom) S.Diag(Loc, diag::warn_gnu_null_ptr_arith) << Pointer->getSourceRange(); else S.Diag(Loc, diag::warn_pointer_arith_null_ptr) << S.getLangOpts().CPlusPlus << Pointer->getSourceRange(); } /// Diagnose invalid subraction on a null pointer. /// static void diagnoseSubtractionOnNullPointer(Sema &S, SourceLocation Loc, Expr *Pointer, bool BothNull) { // Null - null is valid in C++ [expr.add]p7 if (BothNull && S.getLangOpts().CPlusPlus) return; // Is this s a macro from a system header? if (S.Diags.getSuppressSystemWarnings() && S.SourceMgr.isInSystemMacro(Loc)) return; S.DiagRuntimeBehavior(Loc, Pointer, S.PDiag(diag::warn_pointer_sub_null_ptr) << S.getLangOpts().CPlusPlus << Pointer->getSourceRange()); } /// Diagnose invalid arithmetic on two function pointers. static void diagnoseArithmeticOnTwoFunctionPointers(Sema &S, SourceLocation Loc, Expr *LHS, Expr *RHS) { assert(LHS->getType()->isAnyPointerType()); assert(RHS->getType()->isAnyPointerType()); S.Diag(Loc, S.getLangOpts().CPlusPlus ? diag::err_typecheck_pointer_arith_function_type : diag::ext_gnu_ptr_func_arith) << 1 /* two pointers */ << LHS->getType()->getPointeeType() // We only show the second type if it differs from the first. << (unsigned)!S.Context.hasSameUnqualifiedType(LHS->getType(), RHS->getType()) << RHS->getType()->getPointeeType() << LHS->getSourceRange() << RHS->getSourceRange(); } /// Diagnose invalid arithmetic on a function pointer. static void diagnoseArithmeticOnFunctionPointer(Sema &S, SourceLocation Loc, Expr *Pointer) { assert(Pointer->getType()->isAnyPointerType()); S.Diag(Loc, S.getLangOpts().CPlusPlus ? diag::err_typecheck_pointer_arith_function_type : diag::ext_gnu_ptr_func_arith) << 0 /* one pointer */ << Pointer->getType()->getPointeeType() << 0 /* one pointer, so only one type */ << Pointer->getSourceRange(); } /// Emit error if Operand is incomplete pointer type /// /// \returns True if pointer has incomplete type static bool checkArithmeticIncompletePointerType(Sema &S, SourceLocation Loc, Expr *Operand) { QualType ResType = Operand->getType(); if (const AtomicType *ResAtomicType = ResType->getAs()) ResType = ResAtomicType->getValueType(); assert(ResType->isAnyPointerType() && !ResType->isDependentType()); QualType PointeeTy = ResType->getPointeeType(); return S.RequireCompleteSizedType( Loc, PointeeTy, diag::err_typecheck_arithmetic_incomplete_or_sizeless_type, Operand->getSourceRange()); } /// Check the validity of an arithmetic pointer operand. /// /// If the operand has pointer type, this code will check for pointer types /// which are invalid in arithmetic operations. These will be diagnosed /// appropriately, including whether or not the use is supported as an /// extension. /// /// \returns True when the operand is valid to use (even if as an extension). static bool checkArithmeticOpPointerOperand(Sema &S, SourceLocation Loc, Expr *Operand) { QualType ResType = Operand->getType(); if (const AtomicType *ResAtomicType = ResType->getAs()) ResType = ResAtomicType->getValueType(); if (!ResType->isAnyPointerType()) return true; QualType PointeeTy = ResType->getPointeeType(); if (PointeeTy->isVoidType()) { diagnoseArithmeticOnVoidPointer(S, Loc, Operand); return !S.getLangOpts().CPlusPlus; } if (PointeeTy->isFunctionType()) { diagnoseArithmeticOnFunctionPointer(S, Loc, Operand); return !S.getLangOpts().CPlusPlus; } if (checkArithmeticIncompletePointerType(S, Loc, Operand)) return false; return true; } /// Check the validity of a binary arithmetic operation w.r.t. pointer /// operands. /// /// This routine will diagnose any invalid arithmetic on pointer operands much /// like \see checkArithmeticOpPointerOperand. However, it has special logic /// for emitting a single diagnostic even for operations where both LHS and RHS /// are (potentially problematic) pointers. /// /// \returns True when the operand is valid to use (even if as an extension). static bool checkArithmeticBinOpPointerOperands(Sema &S, SourceLocation Loc, Expr *LHSExpr, Expr *RHSExpr) { bool isLHSPointer = LHSExpr->getType()->isAnyPointerType(); bool isRHSPointer = RHSExpr->getType()->isAnyPointerType(); if (!isLHSPointer && !isRHSPointer) return true; QualType LHSPointeeTy, RHSPointeeTy; if (isLHSPointer) LHSPointeeTy = LHSExpr->getType()->getPointeeType(); if (isRHSPointer) RHSPointeeTy = RHSExpr->getType()->getPointeeType(); // if both are pointers check if operation is valid wrt address spaces if (isLHSPointer && isRHSPointer) { if (!LHSPointeeTy.isAddressSpaceOverlapping(RHSPointeeTy)) { S.Diag(Loc, diag::err_typecheck_op_on_nonoverlapping_address_space_pointers) << LHSExpr->getType() << RHSExpr->getType() << 1 /*arithmetic op*/ << LHSExpr->getSourceRange() << RHSExpr->getSourceRange(); return false; } } // Check for arithmetic on pointers to incomplete types. bool isLHSVoidPtr = isLHSPointer && LHSPointeeTy->isVoidType(); bool isRHSVoidPtr = isRHSPointer && RHSPointeeTy->isVoidType(); if (isLHSVoidPtr || isRHSVoidPtr) { if (!isRHSVoidPtr) diagnoseArithmeticOnVoidPointer(S, Loc, LHSExpr); else if (!isLHSVoidPtr) diagnoseArithmeticOnVoidPointer(S, Loc, RHSExpr); else diagnoseArithmeticOnTwoVoidPointers(S, Loc, LHSExpr, RHSExpr); return !S.getLangOpts().CPlusPlus; } bool isLHSFuncPtr = isLHSPointer && LHSPointeeTy->isFunctionType(); bool isRHSFuncPtr = isRHSPointer && RHSPointeeTy->isFunctionType(); if (isLHSFuncPtr || isRHSFuncPtr) { if (!isRHSFuncPtr) diagnoseArithmeticOnFunctionPointer(S, Loc, LHSExpr); else if (!isLHSFuncPtr) diagnoseArithmeticOnFunctionPointer(S, Loc, RHSExpr); else diagnoseArithmeticOnTwoFunctionPointers(S, Loc, LHSExpr, RHSExpr); return !S.getLangOpts().CPlusPlus; } if (isLHSPointer && checkArithmeticIncompletePointerType(S, Loc, LHSExpr)) return false; if (isRHSPointer && checkArithmeticIncompletePointerType(S, Loc, RHSExpr)) return false; return true; } /// diagnoseStringPlusInt - Emit a warning when adding an integer to a string /// literal. static void diagnoseStringPlusInt(Sema &Self, SourceLocation OpLoc, Expr *LHSExpr, Expr *RHSExpr) { StringLiteral* StrExpr = dyn_cast(LHSExpr->IgnoreImpCasts()); Expr* IndexExpr = RHSExpr; if (!StrExpr) { StrExpr = dyn_cast(RHSExpr->IgnoreImpCasts()); IndexExpr = LHSExpr; } bool IsStringPlusInt = StrExpr && IndexExpr->getType()->isIntegralOrUnscopedEnumerationType(); if (!IsStringPlusInt || IndexExpr->isValueDependent()) return; SourceRange DiagRange(LHSExpr->getBeginLoc(), RHSExpr->getEndLoc()); Self.Diag(OpLoc, diag::warn_string_plus_int) << DiagRange << IndexExpr->IgnoreImpCasts()->getType(); // Only print a fixit for "str" + int, not for int + "str". if (IndexExpr == RHSExpr) { SourceLocation EndLoc = Self.getLocForEndOfToken(RHSExpr->getEndLoc()); Self.Diag(OpLoc, diag::note_string_plus_scalar_silence) << FixItHint::CreateInsertion(LHSExpr->getBeginLoc(), "&") << FixItHint::CreateReplacement(SourceRange(OpLoc), "[") << FixItHint::CreateInsertion(EndLoc, "]"); } else Self.Diag(OpLoc, diag::note_string_plus_scalar_silence); } /// Emit a warning when adding a char literal to a string. static void diagnoseStringPlusChar(Sema &Self, SourceLocation OpLoc, Expr *LHSExpr, Expr *RHSExpr) { const Expr *StringRefExpr = LHSExpr; const CharacterLiteral *CharExpr = dyn_cast(RHSExpr->IgnoreImpCasts()); if (!CharExpr) { CharExpr = dyn_cast(LHSExpr->IgnoreImpCasts()); StringRefExpr = RHSExpr; } if (!CharExpr || !StringRefExpr) return; const QualType StringType = StringRefExpr->getType(); // Return if not a PointerType. if (!StringType->isAnyPointerType()) return; // Return if not a CharacterType. if (!StringType->getPointeeType()->isAnyCharacterType()) return; ASTContext &Ctx = Self.getASTContext(); SourceRange DiagRange(LHSExpr->getBeginLoc(), RHSExpr->getEndLoc()); const QualType CharType = CharExpr->getType(); if (!CharType->isAnyCharacterType() && CharType->isIntegerType() && llvm::isUIntN(Ctx.getCharWidth(), CharExpr->getValue())) { Self.Diag(OpLoc, diag::warn_string_plus_char) << DiagRange << Ctx.CharTy; } else { Self.Diag(OpLoc, diag::warn_string_plus_char) << DiagRange << CharExpr->getType(); } // Only print a fixit for str + char, not for char + str. if (isa(RHSExpr->IgnoreImpCasts())) { SourceLocation EndLoc = Self.getLocForEndOfToken(RHSExpr->getEndLoc()); Self.Diag(OpLoc, diag::note_string_plus_scalar_silence) << FixItHint::CreateInsertion(LHSExpr->getBeginLoc(), "&") << FixItHint::CreateReplacement(SourceRange(OpLoc), "[") << FixItHint::CreateInsertion(EndLoc, "]"); } else { Self.Diag(OpLoc, diag::note_string_plus_scalar_silence); } } /// Emit error when two pointers are incompatible. static void diagnosePointerIncompatibility(Sema &S, SourceLocation Loc, Expr *LHSExpr, Expr *RHSExpr) { assert(LHSExpr->getType()->isAnyPointerType()); assert(RHSExpr->getType()->isAnyPointerType()); S.Diag(Loc, diag::err_typecheck_sub_ptr_compatible) << LHSExpr->getType() << RHSExpr->getType() << LHSExpr->getSourceRange() << RHSExpr->getSourceRange(); } // C99 6.5.6 QualType Sema::CheckAdditionOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc, QualType* CompLHSTy) { checkArithmeticNull(*this, LHS, RHS, Loc, /*IsCompare=*/false); if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) { QualType compType = CheckVectorOperands(LHS, RHS, Loc, CompLHSTy, /*AllowBothBool*/ getLangOpts().AltiVec, /*AllowBoolConversions*/ getLangOpts().ZVector, /*AllowBooleanOperation*/ false, /*ReportInvalid*/ true); if (CompLHSTy) *CompLHSTy = compType; return compType; } if (LHS.get()->getType()->isVLSTBuiltinType() || RHS.get()->getType()->isVLSTBuiltinType()) { QualType compType = CheckSizelessVectorOperands(LHS, RHS, Loc, CompLHSTy, ACK_Arithmetic); if (CompLHSTy) *CompLHSTy = compType; return compType; } if (LHS.get()->getType()->isConstantMatrixType() || RHS.get()->getType()->isConstantMatrixType()) { QualType compType = CheckMatrixElementwiseOperands(LHS, RHS, Loc, CompLHSTy); if (CompLHSTy) *CompLHSTy = compType; return compType; } QualType compType = UsualArithmeticConversions( LHS, RHS, Loc, CompLHSTy ? ACK_CompAssign : ACK_Arithmetic); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); // Diagnose "string literal" '+' int and string '+' "char literal". if (Opc == BO_Add) { diagnoseStringPlusInt(*this, Loc, LHS.get(), RHS.get()); diagnoseStringPlusChar(*this, Loc, LHS.get(), RHS.get()); } // handle the common case first (both operands are arithmetic). if (!compType.isNull() && compType->isArithmeticType()) { if (CompLHSTy) *CompLHSTy = compType; return compType; } // Type-checking. Ultimately the pointer's going to be in PExp; // note that we bias towards the LHS being the pointer. Expr *PExp = LHS.get(), *IExp = RHS.get(); bool isObjCPointer; if (PExp->getType()->isPointerType()) { isObjCPointer = false; } else if (PExp->getType()->isObjCObjectPointerType()) { isObjCPointer = true; } else { std::swap(PExp, IExp); if (PExp->getType()->isPointerType()) { isObjCPointer = false; } else if (PExp->getType()->isObjCObjectPointerType()) { isObjCPointer = true; } else { return InvalidOperands(Loc, LHS, RHS); } } assert(PExp->getType()->isAnyPointerType()); if (!IExp->getType()->isIntegerType()) return InvalidOperands(Loc, LHS, RHS); // Adding to a null pointer results in undefined behavior. if (PExp->IgnoreParenCasts()->isNullPointerConstant( Context, Expr::NPC_ValueDependentIsNotNull)) { // In C++ adding zero to a null pointer is defined. Expr::EvalResult KnownVal; if (!getLangOpts().CPlusPlus || (!IExp->isValueDependent() && (!IExp->EvaluateAsInt(KnownVal, Context) || KnownVal.Val.getInt() != 0))) { // Check the conditions to see if this is the 'p = nullptr + n' idiom. bool IsGNUIdiom = BinaryOperator::isNullPointerArithmeticExtension( Context, BO_Add, PExp, IExp); diagnoseArithmeticOnNullPointer(*this, Loc, PExp, IsGNUIdiom); } } if (!checkArithmeticOpPointerOperand(*this, Loc, PExp)) return QualType(); if (isObjCPointer && checkArithmeticOnObjCPointer(*this, Loc, PExp)) return QualType(); // Check array bounds for pointer arithemtic CheckArrayAccess(PExp, IExp); if (CompLHSTy) { QualType LHSTy = Context.isPromotableBitField(LHS.get()); if (LHSTy.isNull()) { LHSTy = LHS.get()->getType(); if (LHSTy->isPromotableIntegerType()) LHSTy = Context.getPromotedIntegerType(LHSTy); } *CompLHSTy = LHSTy; } return PExp->getType(); } // C99 6.5.6 QualType Sema::CheckSubtractionOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, QualType* CompLHSTy) { checkArithmeticNull(*this, LHS, RHS, Loc, /*IsCompare=*/false); if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) { QualType compType = CheckVectorOperands(LHS, RHS, Loc, CompLHSTy, /*AllowBothBool*/ getLangOpts().AltiVec, /*AllowBoolConversions*/ getLangOpts().ZVector, /*AllowBooleanOperation*/ false, /*ReportInvalid*/ true); if (CompLHSTy) *CompLHSTy = compType; return compType; } if (LHS.get()->getType()->isVLSTBuiltinType() || RHS.get()->getType()->isVLSTBuiltinType()) { QualType compType = CheckSizelessVectorOperands(LHS, RHS, Loc, CompLHSTy, ACK_Arithmetic); if (CompLHSTy) *CompLHSTy = compType; return compType; } if (LHS.get()->getType()->isConstantMatrixType() || RHS.get()->getType()->isConstantMatrixType()) { QualType compType = CheckMatrixElementwiseOperands(LHS, RHS, Loc, CompLHSTy); if (CompLHSTy) *CompLHSTy = compType; return compType; } QualType compType = UsualArithmeticConversions( LHS, RHS, Loc, CompLHSTy ? ACK_CompAssign : ACK_Arithmetic); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); // Enforce type constraints: C99 6.5.6p3. // Handle the common case first (both operands are arithmetic). if (!compType.isNull() && compType->isArithmeticType()) { if (CompLHSTy) *CompLHSTy = compType; return compType; } // Either ptr - int or ptr - ptr. if (LHS.get()->getType()->isAnyPointerType()) { QualType lpointee = LHS.get()->getType()->getPointeeType(); // Diagnose bad cases where we step over interface counts. if (LHS.get()->getType()->isObjCObjectPointerType() && checkArithmeticOnObjCPointer(*this, Loc, LHS.get())) return QualType(); // The result type of a pointer-int computation is the pointer type. if (RHS.get()->getType()->isIntegerType()) { // Subtracting from a null pointer should produce a warning. // The last argument to the diagnose call says this doesn't match the // GNU int-to-pointer idiom. if (LHS.get()->IgnoreParenCasts()->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNotNull)) { // In C++ adding zero to a null pointer is defined. Expr::EvalResult KnownVal; if (!getLangOpts().CPlusPlus || (!RHS.get()->isValueDependent() && (!RHS.get()->EvaluateAsInt(KnownVal, Context) || KnownVal.Val.getInt() != 0))) { diagnoseArithmeticOnNullPointer(*this, Loc, LHS.get(), false); } } if (!checkArithmeticOpPointerOperand(*this, Loc, LHS.get())) return QualType(); // Check array bounds for pointer arithemtic CheckArrayAccess(LHS.get(), RHS.get(), /*ArraySubscriptExpr*/nullptr, /*AllowOnePastEnd*/true, /*IndexNegated*/true); if (CompLHSTy) *CompLHSTy = LHS.get()->getType(); return LHS.get()->getType(); } // Handle pointer-pointer subtractions. if (const PointerType *RHSPTy = RHS.get()->getType()->getAs()) { QualType rpointee = RHSPTy->getPointeeType(); if (getLangOpts().CPlusPlus) { // Pointee types must be the same: C++ [expr.add] if (!Context.hasSameUnqualifiedType(lpointee, rpointee)) { diagnosePointerIncompatibility(*this, Loc, LHS.get(), RHS.get()); } } else { // Pointee types must be compatible C99 6.5.6p3 if (!Context.typesAreCompatible( Context.getCanonicalType(lpointee).getUnqualifiedType(), Context.getCanonicalType(rpointee).getUnqualifiedType())) { diagnosePointerIncompatibility(*this, Loc, LHS.get(), RHS.get()); return QualType(); } } if (!checkArithmeticBinOpPointerOperands(*this, Loc, LHS.get(), RHS.get())) return QualType(); bool LHSIsNullPtr = LHS.get()->IgnoreParenCasts()->isNullPointerConstant( Context, Expr::NPC_ValueDependentIsNotNull); bool RHSIsNullPtr = RHS.get()->IgnoreParenCasts()->isNullPointerConstant( Context, Expr::NPC_ValueDependentIsNotNull); // Subtracting nullptr or from nullptr is suspect if (LHSIsNullPtr) diagnoseSubtractionOnNullPointer(*this, Loc, LHS.get(), RHSIsNullPtr); if (RHSIsNullPtr) diagnoseSubtractionOnNullPointer(*this, Loc, RHS.get(), LHSIsNullPtr); // The pointee type may have zero size. As an extension, a structure or // union may have zero size or an array may have zero length. In this // case subtraction does not make sense. if (!rpointee->isVoidType() && !rpointee->isFunctionType()) { CharUnits ElementSize = Context.getTypeSizeInChars(rpointee); if (ElementSize.isZero()) { Diag(Loc,diag::warn_sub_ptr_zero_size_types) << rpointee.getUnqualifiedType() << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } } if (CompLHSTy) *CompLHSTy = LHS.get()->getType(); return Context.getPointerDiffType(); } } return InvalidOperands(Loc, LHS, RHS); } static bool isScopedEnumerationType(QualType T) { if (const EnumType *ET = T->getAs()) return ET->getDecl()->isScoped(); return false; } static void DiagnoseBadShiftValues(Sema& S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc, QualType LHSType) { // OpenCL 6.3j: shift values are effectively % word size of LHS (more defined), // so skip remaining warnings as we don't want to modify values within Sema. if (S.getLangOpts().OpenCL) return; // Check right/shifter operand Expr::EvalResult RHSResult; if (RHS.get()->isValueDependent() || !RHS.get()->EvaluateAsInt(RHSResult, S.Context)) return; llvm::APSInt Right = RHSResult.Val.getInt(); if (Right.isNegative()) { S.DiagRuntimeBehavior(Loc, RHS.get(), S.PDiag(diag::warn_shift_negative) << RHS.get()->getSourceRange()); return; } QualType LHSExprType = LHS.get()->getType(); uint64_t LeftSize = S.Context.getTypeSize(LHSExprType); if (LHSExprType->isBitIntType()) LeftSize = S.Context.getIntWidth(LHSExprType); else if (LHSExprType->isFixedPointType()) { auto FXSema = S.Context.getFixedPointSemantics(LHSExprType); LeftSize = FXSema.getWidth() - (unsigned)FXSema.hasUnsignedPadding(); } llvm::APInt LeftBits(Right.getBitWidth(), LeftSize); if (Right.uge(LeftBits)) { S.DiagRuntimeBehavior(Loc, RHS.get(), S.PDiag(diag::warn_shift_gt_typewidth) << RHS.get()->getSourceRange()); return; } // FIXME: We probably need to handle fixed point types specially here. if (Opc != BO_Shl || LHSExprType->isFixedPointType()) return; // When left shifting an ICE which is signed, we can check for overflow which // according to C++ standards prior to C++2a has undefined behavior // ([expr.shift] 5.8/2). Unsigned integers have defined behavior modulo one // more than the maximum value representable in the result type, so never // warn for those. (FIXME: Unsigned left-shift overflow in a constant // expression is still probably a bug.) Expr::EvalResult LHSResult; if (LHS.get()->isValueDependent() || LHSType->hasUnsignedIntegerRepresentation() || !LHS.get()->EvaluateAsInt(LHSResult, S.Context)) return; llvm::APSInt Left = LHSResult.Val.getInt(); // Don't warn if signed overflow is defined, then all the rest of the // diagnostics will not be triggered because the behavior is defined. // Also don't warn in C++20 mode (and newer), as signed left shifts // always wrap and never overflow. if (S.getLangOpts().isSignedOverflowDefined() || S.getLangOpts().CPlusPlus20) return; // If LHS does not have a non-negative value then, the // behavior is undefined before C++2a. Warn about it. if (Left.isNegative()) { S.DiagRuntimeBehavior(Loc, LHS.get(), S.PDiag(diag::warn_shift_lhs_negative) << LHS.get()->getSourceRange()); return; } llvm::APInt ResultBits = static_cast(Right) + Left.getMinSignedBits(); if (LeftBits.uge(ResultBits)) return; llvm::APSInt Result = Left.extend(ResultBits.getLimitedValue()); Result = Result.shl(Right); // Print the bit representation of the signed integer as an unsigned // hexadecimal number. SmallString<40> HexResult; Result.toString(HexResult, 16, /*Signed =*/false, /*Literal =*/true); // If we are only missing a sign bit, this is less likely to result in actual // bugs -- if the result is cast back to an unsigned type, it will have the // expected value. Thus we place this behind a different warning that can be // turned off separately if needed. if (LeftBits == ResultBits - 1) { S.Diag(Loc, diag::warn_shift_result_sets_sign_bit) << HexResult << LHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return; } S.Diag(Loc, diag::warn_shift_result_gt_typewidth) << HexResult.str() << Result.getMinSignedBits() << LHSType << Left.getBitWidth() << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } /// Return the resulting type when a vector is shifted /// by a scalar or vector shift amount. static QualType checkVectorShift(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsCompAssign) { // OpenCL v1.1 s6.3.j says RHS can be a vector only if LHS is a vector. if ((S.LangOpts.OpenCL || S.LangOpts.ZVector) && !LHS.get()->getType()->isVectorType()) { S.Diag(Loc, diag::err_shift_rhs_only_vector) << RHS.get()->getType() << LHS.get()->getType() << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } if (!IsCompAssign) { LHS = S.UsualUnaryConversions(LHS.get()); if (LHS.isInvalid()) return QualType(); } RHS = S.UsualUnaryConversions(RHS.get()); if (RHS.isInvalid()) return QualType(); QualType LHSType = LHS.get()->getType(); // Note that LHS might be a scalar because the routine calls not only in // OpenCL case. const VectorType *LHSVecTy = LHSType->getAs(); QualType LHSEleType = LHSVecTy ? LHSVecTy->getElementType() : LHSType; // Note that RHS might not be a vector. QualType RHSType = RHS.get()->getType(); const VectorType *RHSVecTy = RHSType->getAs(); QualType RHSEleType = RHSVecTy ? RHSVecTy->getElementType() : RHSType; // Do not allow shifts for boolean vectors. if ((LHSVecTy && LHSVecTy->isExtVectorBoolType()) || (RHSVecTy && RHSVecTy->isExtVectorBoolType())) { S.Diag(Loc, diag::err_typecheck_invalid_operands) << LHS.get()->getType() << RHS.get()->getType() << LHS.get()->getSourceRange(); return QualType(); } // The operands need to be integers. if (!LHSEleType->isIntegerType()) { S.Diag(Loc, diag::err_typecheck_expect_int) << LHS.get()->getType() << LHS.get()->getSourceRange(); return QualType(); } if (!RHSEleType->isIntegerType()) { S.Diag(Loc, diag::err_typecheck_expect_int) << RHS.get()->getType() << RHS.get()->getSourceRange(); return QualType(); } if (!LHSVecTy) { assert(RHSVecTy); if (IsCompAssign) return RHSType; if (LHSEleType != RHSEleType) { LHS = S.ImpCastExprToType(LHS.get(),RHSEleType, CK_IntegralCast); LHSEleType = RHSEleType; } QualType VecTy = S.Context.getExtVectorType(LHSEleType, RHSVecTy->getNumElements()); LHS = S.ImpCastExprToType(LHS.get(), VecTy, CK_VectorSplat); LHSType = VecTy; } else if (RHSVecTy) { // OpenCL v1.1 s6.3.j says that for vector types, the operators // are applied component-wise. So if RHS is a vector, then ensure // that the number of elements is the same as LHS... if (RHSVecTy->getNumElements() != LHSVecTy->getNumElements()) { S.Diag(Loc, diag::err_typecheck_vector_lengths_not_equal) << LHS.get()->getType() << RHS.get()->getType() << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } if (!S.LangOpts.OpenCL && !S.LangOpts.ZVector) { const BuiltinType *LHSBT = LHSEleType->getAs(); const BuiltinType *RHSBT = RHSEleType->getAs(); if (LHSBT != RHSBT && S.Context.getTypeSize(LHSBT) != S.Context.getTypeSize(RHSBT)) { S.Diag(Loc, diag::warn_typecheck_vector_element_sizes_not_equal) << LHS.get()->getType() << RHS.get()->getType() << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } } } else { // ...else expand RHS to match the number of elements in LHS. QualType VecTy = S.Context.getExtVectorType(RHSEleType, LHSVecTy->getNumElements()); RHS = S.ImpCastExprToType(RHS.get(), VecTy, CK_VectorSplat); } return LHSType; } static QualType checkSizelessVectorShift(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsCompAssign) { if (!IsCompAssign) { LHS = S.UsualUnaryConversions(LHS.get()); if (LHS.isInvalid()) return QualType(); } RHS = S.UsualUnaryConversions(RHS.get()); if (RHS.isInvalid()) return QualType(); QualType LHSType = LHS.get()->getType(); const BuiltinType *LHSBuiltinTy = LHSType->getAs(); QualType LHSEleType = LHSType->isVLSTBuiltinType() ? LHSBuiltinTy->getSveEltType(S.getASTContext()) : LHSType; // Note that RHS might not be a vector QualType RHSType = RHS.get()->getType(); const BuiltinType *RHSBuiltinTy = RHSType->getAs(); QualType RHSEleType = RHSType->isVLSTBuiltinType() ? RHSBuiltinTy->getSveEltType(S.getASTContext()) : RHSType; if ((LHSBuiltinTy && LHSBuiltinTy->isSVEBool()) || (RHSBuiltinTy && RHSBuiltinTy->isSVEBool())) { S.Diag(Loc, diag::err_typecheck_invalid_operands) << LHSType << RHSType << LHS.get()->getSourceRange(); return QualType(); } if (!LHSEleType->isIntegerType()) { S.Diag(Loc, diag::err_typecheck_expect_int) << LHS.get()->getType() << LHS.get()->getSourceRange(); return QualType(); } if (!RHSEleType->isIntegerType()) { S.Diag(Loc, diag::err_typecheck_expect_int) << RHS.get()->getType() << RHS.get()->getSourceRange(); return QualType(); } if (LHSType->isVLSTBuiltinType() && RHSType->isVLSTBuiltinType() && (S.Context.getBuiltinVectorTypeInfo(LHSBuiltinTy).EC != S.Context.getBuiltinVectorTypeInfo(RHSBuiltinTy).EC)) { S.Diag(Loc, diag::err_typecheck_invalid_operands) << LHSType << RHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } if (!LHSType->isVLSTBuiltinType()) { assert(RHSType->isVLSTBuiltinType()); if (IsCompAssign) return RHSType; if (LHSEleType != RHSEleType) { LHS = S.ImpCastExprToType(LHS.get(), RHSEleType, clang::CK_IntegralCast); LHSEleType = RHSEleType; } const llvm::ElementCount VecSize = S.Context.getBuiltinVectorTypeInfo(RHSBuiltinTy).EC; QualType VecTy = S.Context.getScalableVectorType(LHSEleType, VecSize.getKnownMinValue()); LHS = S.ImpCastExprToType(LHS.get(), VecTy, clang::CK_VectorSplat); LHSType = VecTy; } else if (RHSBuiltinTy && RHSBuiltinTy->isVLSTBuiltinType()) { if (S.Context.getTypeSize(RHSBuiltinTy) != S.Context.getTypeSize(LHSBuiltinTy)) { S.Diag(Loc, diag::err_typecheck_vector_lengths_not_equal) << LHSType << RHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } } else { const llvm::ElementCount VecSize = S.Context.getBuiltinVectorTypeInfo(LHSBuiltinTy).EC; if (LHSEleType != RHSEleType) { RHS = S.ImpCastExprToType(RHS.get(), LHSEleType, clang::CK_IntegralCast); RHSEleType = LHSEleType; } QualType VecTy = S.Context.getScalableVectorType(RHSEleType, VecSize.getKnownMinValue()); RHS = S.ImpCastExprToType(RHS.get(), VecTy, CK_VectorSplat); } return LHSType; } // C99 6.5.7 QualType Sema::CheckShiftOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc, bool IsCompAssign) { checkArithmeticNull(*this, LHS, RHS, Loc, /*IsCompare=*/false); // Vector shifts promote their scalar inputs to vector type. if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) { if (LangOpts.ZVector) { // The shift operators for the z vector extensions work basically // like general shifts, except that neither the LHS nor the RHS is // allowed to be a "vector bool". if (auto LHSVecType = LHS.get()->getType()->getAs()) if (LHSVecType->getVectorKind() == VectorType::AltiVecBool) return InvalidOperands(Loc, LHS, RHS); if (auto RHSVecType = RHS.get()->getType()->getAs()) if (RHSVecType->getVectorKind() == VectorType::AltiVecBool) return InvalidOperands(Loc, LHS, RHS); } return checkVectorShift(*this, LHS, RHS, Loc, IsCompAssign); } if (LHS.get()->getType()->isVLSTBuiltinType() || RHS.get()->getType()->isVLSTBuiltinType()) return checkSizelessVectorShift(*this, LHS, RHS, Loc, IsCompAssign); // Shifts don't perform usual arithmetic conversions, they just do integer // promotions on each operand. C99 6.5.7p3 // For the LHS, do usual unary conversions, but then reset them away // if this is a compound assignment. ExprResult OldLHS = LHS; LHS = UsualUnaryConversions(LHS.get()); if (LHS.isInvalid()) return QualType(); QualType LHSType = LHS.get()->getType(); if (IsCompAssign) LHS = OldLHS; // The RHS is simpler. RHS = UsualUnaryConversions(RHS.get()); if (RHS.isInvalid()) return QualType(); QualType RHSType = RHS.get()->getType(); // C99 6.5.7p2: Each of the operands shall have integer type. // Embedded-C 4.1.6.2.2: The LHS may also be fixed-point. if ((!LHSType->isFixedPointOrIntegerType() && !LHSType->hasIntegerRepresentation()) || !RHSType->hasIntegerRepresentation()) return InvalidOperands(Loc, LHS, RHS); // C++0x: Don't allow scoped enums. FIXME: Use something better than // hasIntegerRepresentation() above instead of this. if (isScopedEnumerationType(LHSType) || isScopedEnumerationType(RHSType)) { return InvalidOperands(Loc, LHS, RHS); } DiagnoseBadShiftValues(*this, LHS, RHS, Loc, Opc, LHSType); // "The type of the result is that of the promoted left operand." return LHSType; } /// Diagnose bad pointer comparisons. static void diagnoseDistinctPointerComparison(Sema &S, SourceLocation Loc, ExprResult &LHS, ExprResult &RHS, bool IsError) { S.Diag(Loc, IsError ? diag::err_typecheck_comparison_of_distinct_pointers : diag::ext_typecheck_comparison_of_distinct_pointers) << LHS.get()->getType() << RHS.get()->getType() << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } /// Returns false if the pointers are converted to a composite type, /// true otherwise. static bool convertPointersToCompositeType(Sema &S, SourceLocation Loc, ExprResult &LHS, ExprResult &RHS) { // C++ [expr.rel]p2: // [...] Pointer conversions (4.10) and qualification // conversions (4.4) are performed on pointer operands (or on // a pointer operand and a null pointer constant) to bring // them to their composite pointer type. [...] // // C++ [expr.eq]p1 uses the same notion for (in)equality // comparisons of pointers. QualType LHSType = LHS.get()->getType(); QualType RHSType = RHS.get()->getType(); assert(LHSType->isPointerType() || RHSType->isPointerType() || LHSType->isMemberPointerType() || RHSType->isMemberPointerType()); QualType T = S.FindCompositePointerType(Loc, LHS, RHS); if (T.isNull()) { if ((LHSType->isAnyPointerType() || LHSType->isMemberPointerType()) && (RHSType->isAnyPointerType() || RHSType->isMemberPointerType())) diagnoseDistinctPointerComparison(S, Loc, LHS, RHS, /*isError*/true); else S.InvalidOperands(Loc, LHS, RHS); return true; } return false; } static void diagnoseFunctionPointerToVoidComparison(Sema &S, SourceLocation Loc, ExprResult &LHS, ExprResult &RHS, bool IsError) { S.Diag(Loc, IsError ? diag::err_typecheck_comparison_of_fptr_to_void : diag::ext_typecheck_comparison_of_fptr_to_void) << LHS.get()->getType() << RHS.get()->getType() << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } static bool isObjCObjectLiteral(ExprResult &E) { switch (E.get()->IgnoreParenImpCasts()->getStmtClass()) { case Stmt::ObjCArrayLiteralClass: case Stmt::ObjCDictionaryLiteralClass: case Stmt::ObjCStringLiteralClass: case Stmt::ObjCBoxedExprClass: return true; default: // Note that ObjCBoolLiteral is NOT an object literal! return false; } } static bool hasIsEqualMethod(Sema &S, const Expr *LHS, const Expr *RHS) { const ObjCObjectPointerType *Type = LHS->getType()->getAs(); // If this is not actually an Objective-C object, bail out. if (!Type) return false; // Get the LHS object's interface type. QualType InterfaceType = Type->getPointeeType(); // If the RHS isn't an Objective-C object, bail out. if (!RHS->getType()->isObjCObjectPointerType()) return false; // Try to find the -isEqual: method. Selector IsEqualSel = S.NSAPIObj->getIsEqualSelector(); ObjCMethodDecl *Method = S.LookupMethodInObjectType(IsEqualSel, InterfaceType, /*IsInstance=*/true); if (!Method) { if (Type->isObjCIdType()) { // For 'id', just check the global pool. Method = S.LookupInstanceMethodInGlobalPool(IsEqualSel, SourceRange(), /*receiverId=*/true); } else { // Check protocols. Method = S.LookupMethodInQualifiedType(IsEqualSel, Type, /*IsInstance=*/true); } } if (!Method) return false; QualType T = Method->parameters()[0]->getType(); if (!T->isObjCObjectPointerType()) return false; QualType R = Method->getReturnType(); if (!R->isScalarType()) return false; return true; } Sema::ObjCLiteralKind Sema::CheckLiteralKind(Expr *FromE) { FromE = FromE->IgnoreParenImpCasts(); switch (FromE->getStmtClass()) { default: break; case Stmt::ObjCStringLiteralClass: // "string literal" return LK_String; case Stmt::ObjCArrayLiteralClass: // "array literal" return LK_Array; case Stmt::ObjCDictionaryLiteralClass: // "dictionary literal" return LK_Dictionary; case Stmt::BlockExprClass: return LK_Block; case Stmt::ObjCBoxedExprClass: { Expr *Inner = cast(FromE)->getSubExpr()->IgnoreParens(); switch (Inner->getStmtClass()) { case Stmt::IntegerLiteralClass: case Stmt::FloatingLiteralClass: case Stmt::CharacterLiteralClass: case Stmt::ObjCBoolLiteralExprClass: case Stmt::CXXBoolLiteralExprClass: // "numeric literal" return LK_Numeric; case Stmt::ImplicitCastExprClass: { CastKind CK = cast(Inner)->getCastKind(); // Boolean literals can be represented by implicit casts. if (CK == CK_IntegralToBoolean || CK == CK_IntegralCast) return LK_Numeric; break; } default: break; } return LK_Boxed; } } return LK_None; } static void diagnoseObjCLiteralComparison(Sema &S, SourceLocation Loc, ExprResult &LHS, ExprResult &RHS, BinaryOperator::Opcode Opc){ Expr *Literal; Expr *Other; if (isObjCObjectLiteral(LHS)) { Literal = LHS.get(); Other = RHS.get(); } else { Literal = RHS.get(); Other = LHS.get(); } // Don't warn on comparisons against nil. Other = Other->IgnoreParenCasts(); if (Other->isNullPointerConstant(S.getASTContext(), Expr::NPC_ValueDependentIsNotNull)) return; // This should be kept in sync with warn_objc_literal_comparison. // LK_String should always be after the other literals, since it has its own // warning flag. Sema::ObjCLiteralKind LiteralKind = S.CheckLiteralKind(Literal); assert(LiteralKind != Sema::LK_Block); if (LiteralKind == Sema::LK_None) { llvm_unreachable("Unknown Objective-C object literal kind"); } if (LiteralKind == Sema::LK_String) S.Diag(Loc, diag::warn_objc_string_literal_comparison) << Literal->getSourceRange(); else S.Diag(Loc, diag::warn_objc_literal_comparison) << LiteralKind << Literal->getSourceRange(); if (BinaryOperator::isEqualityOp(Opc) && hasIsEqualMethod(S, LHS.get(), RHS.get())) { SourceLocation Start = LHS.get()->getBeginLoc(); SourceLocation End = S.getLocForEndOfToken(RHS.get()->getEndLoc()); CharSourceRange OpRange = CharSourceRange::getCharRange(Loc, S.getLocForEndOfToken(Loc)); S.Diag(Loc, diag::note_objc_literal_comparison_isequal) << FixItHint::CreateInsertion(Start, Opc == BO_EQ ? "[" : "![") << FixItHint::CreateReplacement(OpRange, " isEqual:") << FixItHint::CreateInsertion(End, "]"); } } /// Warns on !x < y, !x & y where !(x < y), !(x & y) was probably intended. static void diagnoseLogicalNotOnLHSofCheck(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc) { // Check that left hand side is !something. UnaryOperator *UO = dyn_cast(LHS.get()->IgnoreImpCasts()); if (!UO || UO->getOpcode() != UO_LNot) return; // Only check if the right hand side is non-bool arithmetic type. if (RHS.get()->isKnownToHaveBooleanValue()) return; // Make sure that the something in !something is not bool. Expr *SubExpr = UO->getSubExpr()->IgnoreImpCasts(); if (SubExpr->isKnownToHaveBooleanValue()) return; // Emit warning. bool IsBitwiseOp = Opc == BO_And || Opc == BO_Or || Opc == BO_Xor; S.Diag(UO->getOperatorLoc(), diag::warn_logical_not_on_lhs_of_check) << Loc << IsBitwiseOp; // First note suggest !(x < y) SourceLocation FirstOpen = SubExpr->getBeginLoc(); SourceLocation FirstClose = RHS.get()->getEndLoc(); FirstClose = S.getLocForEndOfToken(FirstClose); if (FirstClose.isInvalid()) FirstOpen = SourceLocation(); S.Diag(UO->getOperatorLoc(), diag::note_logical_not_fix) << IsBitwiseOp << FixItHint::CreateInsertion(FirstOpen, "(") << FixItHint::CreateInsertion(FirstClose, ")"); // Second note suggests (!x) < y SourceLocation SecondOpen = LHS.get()->getBeginLoc(); SourceLocation SecondClose = LHS.get()->getEndLoc(); SecondClose = S.getLocForEndOfToken(SecondClose); if (SecondClose.isInvalid()) SecondOpen = SourceLocation(); S.Diag(UO->getOperatorLoc(), diag::note_logical_not_silence_with_parens) << FixItHint::CreateInsertion(SecondOpen, "(") << FixItHint::CreateInsertion(SecondClose, ")"); } // Returns true if E refers to a non-weak array. static bool checkForArray(const Expr *E) { const ValueDecl *D = nullptr; if (const DeclRefExpr *DR = dyn_cast(E)) { D = DR->getDecl(); } else if (const MemberExpr *Mem = dyn_cast(E)) { if (Mem->isImplicitAccess()) D = Mem->getMemberDecl(); } if (!D) return false; return D->getType()->isArrayType() && !D->isWeak(); } /// Diagnose some forms of syntactically-obvious tautological comparison. static void diagnoseTautologicalComparison(Sema &S, SourceLocation Loc, Expr *LHS, Expr *RHS, BinaryOperatorKind Opc) { Expr *LHSStripped = LHS->IgnoreParenImpCasts(); Expr *RHSStripped = RHS->IgnoreParenImpCasts(); QualType LHSType = LHS->getType(); QualType RHSType = RHS->getType(); if (LHSType->hasFloatingRepresentation() || (LHSType->isBlockPointerType() && !BinaryOperator::isEqualityOp(Opc)) || S.inTemplateInstantiation()) return; // Comparisons between two array types are ill-formed for operator<=>, so // we shouldn't emit any additional warnings about it. if (Opc == BO_Cmp && LHSType->isArrayType() && RHSType->isArrayType()) return; // For non-floating point types, check for self-comparisons of the form // x == x, x != x, x < x, etc. These always evaluate to a constant, and // often indicate logic errors in the program. // // NOTE: Don't warn about comparison expressions resulting from macro // expansion. Also don't warn about comparisons which are only self // comparisons within a template instantiation. The warnings should catch // obvious cases in the definition of the template anyways. The idea is to // warn when the typed comparison operator will always evaluate to the same // result. // Used for indexing into %select in warn_comparison_always enum { AlwaysConstant, AlwaysTrue, AlwaysFalse, AlwaysEqual, // std::strong_ordering::equal from operator<=> }; // C++2a [depr.array.comp]: // Equality and relational comparisons ([expr.eq], [expr.rel]) between two // operands of array type are deprecated. if (S.getLangOpts().CPlusPlus20 && LHSStripped->getType()->isArrayType() && RHSStripped->getType()->isArrayType()) { S.Diag(Loc, diag::warn_depr_array_comparison) << LHS->getSourceRange() << RHS->getSourceRange() << LHSStripped->getType() << RHSStripped->getType(); // Carry on to produce the tautological comparison warning, if this // expression is potentially-evaluated, we can resolve the array to a // non-weak declaration, and so on. } if (!LHS->getBeginLoc().isMacroID() && !RHS->getBeginLoc().isMacroID()) { if (Expr::isSameComparisonOperand(LHS, RHS)) { unsigned Result; switch (Opc) { case BO_EQ: case BO_LE: case BO_GE: Result = AlwaysTrue; break; case BO_NE: case BO_LT: case BO_GT: Result = AlwaysFalse; break; case BO_Cmp: Result = AlwaysEqual; break; default: Result = AlwaysConstant; break; } S.DiagRuntimeBehavior(Loc, nullptr, S.PDiag(diag::warn_comparison_always) << 0 /*self-comparison*/ << Result); } else if (checkForArray(LHSStripped) && checkForArray(RHSStripped)) { // What is it always going to evaluate to? unsigned Result; switch (Opc) { case BO_EQ: // e.g. array1 == array2 Result = AlwaysFalse; break; case BO_NE: // e.g. array1 != array2 Result = AlwaysTrue; break; default: // e.g. array1 <= array2 // The best we can say is 'a constant' Result = AlwaysConstant; break; } S.DiagRuntimeBehavior(Loc, nullptr, S.PDiag(diag::warn_comparison_always) << 1 /*array comparison*/ << Result); } } if (isa(LHSStripped)) LHSStripped = LHSStripped->IgnoreParenCasts(); if (isa(RHSStripped)) RHSStripped = RHSStripped->IgnoreParenCasts(); // Warn about comparisons against a string constant (unless the other // operand is null); the user probably wants string comparison function. Expr *LiteralString = nullptr; Expr *LiteralStringStripped = nullptr; if ((isa(LHSStripped) || isa(LHSStripped)) && !RHSStripped->isNullPointerConstant(S.Context, Expr::NPC_ValueDependentIsNull)) { LiteralString = LHS; LiteralStringStripped = LHSStripped; } else if ((isa(RHSStripped) || isa(RHSStripped)) && !LHSStripped->isNullPointerConstant(S.Context, Expr::NPC_ValueDependentIsNull)) { LiteralString = RHS; LiteralStringStripped = RHSStripped; } if (LiteralString) { S.DiagRuntimeBehavior(Loc, nullptr, S.PDiag(diag::warn_stringcompare) << isa(LiteralStringStripped) << LiteralString->getSourceRange()); } } static ImplicitConversionKind castKindToImplicitConversionKind(CastKind CK) { switch (CK) { default: { #ifndef NDEBUG llvm::errs() << "unhandled cast kind: " << CastExpr::getCastKindName(CK) << "\n"; #endif llvm_unreachable("unhandled cast kind"); } case CK_UserDefinedConversion: return ICK_Identity; case CK_LValueToRValue: return ICK_Lvalue_To_Rvalue; case CK_ArrayToPointerDecay: return ICK_Array_To_Pointer; case CK_FunctionToPointerDecay: return ICK_Function_To_Pointer; case CK_IntegralCast: return ICK_Integral_Conversion; case CK_FloatingCast: return ICK_Floating_Conversion; case CK_IntegralToFloating: case CK_FloatingToIntegral: return ICK_Floating_Integral; case CK_IntegralComplexCast: case CK_FloatingComplexCast: case CK_FloatingComplexToIntegralComplex: case CK_IntegralComplexToFloatingComplex: return ICK_Complex_Conversion; case CK_FloatingComplexToReal: case CK_FloatingRealToComplex: case CK_IntegralComplexToReal: case CK_IntegralRealToComplex: return ICK_Complex_Real; } } static bool checkThreeWayNarrowingConversion(Sema &S, QualType ToType, Expr *E, QualType FromType, SourceLocation Loc) { // Check for a narrowing implicit conversion. StandardConversionSequence SCS; SCS.setAsIdentityConversion(); SCS.setToType(0, FromType); SCS.setToType(1, ToType); if (const auto *ICE = dyn_cast(E)) SCS.Second = castKindToImplicitConversionKind(ICE->getCastKind()); APValue PreNarrowingValue; QualType PreNarrowingType; switch (SCS.getNarrowingKind(S.Context, E, PreNarrowingValue, PreNarrowingType, /*IgnoreFloatToIntegralConversion*/ true)) { case NK_Dependent_Narrowing: // Implicit conversion to a narrower type, but the expression is // value-dependent so we can't tell whether it's actually narrowing. case NK_Not_Narrowing: return false; case NK_Constant_Narrowing: // Implicit conversion to a narrower type, and the value is not a constant // expression. S.Diag(E->getBeginLoc(), diag::err_spaceship_argument_narrowing) << /*Constant*/ 1 << PreNarrowingValue.getAsString(S.Context, PreNarrowingType) << ToType; return true; case NK_Variable_Narrowing: // Implicit conversion to a narrower type, and the value is not a constant // expression. case NK_Type_Narrowing: S.Diag(E->getBeginLoc(), diag::err_spaceship_argument_narrowing) << /*Constant*/ 0 << FromType << ToType; // TODO: It's not a constant expression, but what if the user intended it // to be? Can we produce notes to help them figure out why it isn't? return true; } llvm_unreachable("unhandled case in switch"); } static QualType checkArithmeticOrEnumeralThreeWayCompare(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc) { QualType LHSType = LHS.get()->getType(); QualType RHSType = RHS.get()->getType(); // Dig out the original argument type and expression before implicit casts // were applied. These are the types/expressions we need to check the // [expr.spaceship] requirements against. ExprResult LHSStripped = LHS.get()->IgnoreParenImpCasts(); ExprResult RHSStripped = RHS.get()->IgnoreParenImpCasts(); QualType LHSStrippedType = LHSStripped.get()->getType(); QualType RHSStrippedType = RHSStripped.get()->getType(); // C++2a [expr.spaceship]p3: If one of the operands is of type bool and the // other is not, the program is ill-formed. if (LHSStrippedType->isBooleanType() != RHSStrippedType->isBooleanType()) { S.InvalidOperands(Loc, LHSStripped, RHSStripped); return QualType(); } // FIXME: Consider combining this with checkEnumArithmeticConversions. int NumEnumArgs = (int)LHSStrippedType->isEnumeralType() + RHSStrippedType->isEnumeralType(); if (NumEnumArgs == 1) { bool LHSIsEnum = LHSStrippedType->isEnumeralType(); QualType OtherTy = LHSIsEnum ? RHSStrippedType : LHSStrippedType; if (OtherTy->hasFloatingRepresentation()) { S.InvalidOperands(Loc, LHSStripped, RHSStripped); return QualType(); } } if (NumEnumArgs == 2) { // C++2a [expr.spaceship]p5: If both operands have the same enumeration // type E, the operator yields the result of converting the operands // to the underlying type of E and applying <=> to the converted operands. if (!S.Context.hasSameUnqualifiedType(LHSStrippedType, RHSStrippedType)) { S.InvalidOperands(Loc, LHS, RHS); return QualType(); } QualType IntType = LHSStrippedType->castAs()->getDecl()->getIntegerType(); assert(IntType->isArithmeticType()); // We can't use `CK_IntegralCast` when the underlying type is 'bool', so we // promote the boolean type, and all other promotable integer types, to // avoid this. if (IntType->isPromotableIntegerType()) IntType = S.Context.getPromotedIntegerType(IntType); LHS = S.ImpCastExprToType(LHS.get(), IntType, CK_IntegralCast); RHS = S.ImpCastExprToType(RHS.get(), IntType, CK_IntegralCast); LHSType = RHSType = IntType; } // C++2a [expr.spaceship]p4: If both operands have arithmetic types, the // usual arithmetic conversions are applied to the operands. QualType Type = S.UsualArithmeticConversions(LHS, RHS, Loc, Sema::ACK_Comparison); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); if (Type.isNull()) return S.InvalidOperands(Loc, LHS, RHS); Optional CCT = getComparisonCategoryForBuiltinCmp(Type); if (!CCT) return S.InvalidOperands(Loc, LHS, RHS); bool HasNarrowing = checkThreeWayNarrowingConversion( S, Type, LHS.get(), LHSType, LHS.get()->getBeginLoc()); HasNarrowing |= checkThreeWayNarrowingConversion(S, Type, RHS.get(), RHSType, RHS.get()->getBeginLoc()); if (HasNarrowing) return QualType(); assert(!Type.isNull() && "composite type for <=> has not been set"); return S.CheckComparisonCategoryType( *CCT, Loc, Sema::ComparisonCategoryUsage::OperatorInExpression); } static QualType checkArithmeticOrEnumeralCompare(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc) { if (Opc == BO_Cmp) return checkArithmeticOrEnumeralThreeWayCompare(S, LHS, RHS, Loc); // C99 6.5.8p3 / C99 6.5.9p4 QualType Type = S.UsualArithmeticConversions(LHS, RHS, Loc, Sema::ACK_Comparison); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); if (Type.isNull()) return S.InvalidOperands(Loc, LHS, RHS); assert(Type->isArithmeticType() || Type->isEnumeralType()); if (Type->isAnyComplexType() && BinaryOperator::isRelationalOp(Opc)) return S.InvalidOperands(Loc, LHS, RHS); // Check for comparisons of floating point operands using != and ==. if (Type->hasFloatingRepresentation()) S.CheckFloatComparison(Loc, LHS.get(), RHS.get(), Opc); // The result of comparisons is 'bool' in C++, 'int' in C. return S.Context.getLogicalOperationType(); } void Sema::CheckPtrComparisonWithNullChar(ExprResult &E, ExprResult &NullE) { if (!NullE.get()->getType()->isAnyPointerType()) return; int NullValue = PP.isMacroDefined("NULL") ? 0 : 1; if (!E.get()->getType()->isAnyPointerType() && E.get()->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNotNull) == Expr::NPCK_ZeroExpression) { if (const auto *CL = dyn_cast(E.get())) { if (CL->getValue() == 0) Diag(E.get()->getExprLoc(), diag::warn_pointer_compare) << NullValue << FixItHint::CreateReplacement(E.get()->getExprLoc(), NullValue ? "NULL" : "(void *)0"); } else if (const auto *CE = dyn_cast(E.get())) { TypeSourceInfo *TI = CE->getTypeInfoAsWritten(); QualType T = Context.getCanonicalType(TI->getType()).getUnqualifiedType(); if (T == Context.CharTy) Diag(E.get()->getExprLoc(), diag::warn_pointer_compare) << NullValue << FixItHint::CreateReplacement(E.get()->getExprLoc(), NullValue ? "NULL" : "(void *)0"); } } } // C99 6.5.8, C++ [expr.rel] QualType Sema::CheckCompareOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc) { bool IsRelational = BinaryOperator::isRelationalOp(Opc); bool IsThreeWay = Opc == BO_Cmp; bool IsOrdered = IsRelational || IsThreeWay; auto IsAnyPointerType = [](ExprResult E) { QualType Ty = E.get()->getType(); return Ty->isPointerType() || Ty->isMemberPointerType(); }; // C++2a [expr.spaceship]p6: If at least one of the operands is of pointer // type, array-to-pointer, ..., conversions are performed on both operands to // bring them to their composite type. // Otherwise, all comparisons expect an rvalue, so convert to rvalue before // any type-related checks. if (!IsThreeWay || IsAnyPointerType(LHS) || IsAnyPointerType(RHS)) { LHS = DefaultFunctionArrayLvalueConversion(LHS.get()); if (LHS.isInvalid()) return QualType(); RHS = DefaultFunctionArrayLvalueConversion(RHS.get()); if (RHS.isInvalid()) return QualType(); } else { LHS = DefaultLvalueConversion(LHS.get()); if (LHS.isInvalid()) return QualType(); RHS = DefaultLvalueConversion(RHS.get()); if (RHS.isInvalid()) return QualType(); } checkArithmeticNull(*this, LHS, RHS, Loc, /*IsCompare=*/true); if (!getLangOpts().CPlusPlus && BinaryOperator::isEqualityOp(Opc)) { CheckPtrComparisonWithNullChar(LHS, RHS); CheckPtrComparisonWithNullChar(RHS, LHS); } // Handle vector comparisons separately. if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) return CheckVectorCompareOperands(LHS, RHS, Loc, Opc); if (LHS.get()->getType()->isVLSTBuiltinType() || RHS.get()->getType()->isVLSTBuiltinType()) return CheckSizelessVectorCompareOperands(LHS, RHS, Loc, Opc); diagnoseLogicalNotOnLHSofCheck(*this, LHS, RHS, Loc, Opc); diagnoseTautologicalComparison(*this, Loc, LHS.get(), RHS.get(), Opc); QualType LHSType = LHS.get()->getType(); QualType RHSType = RHS.get()->getType(); if ((LHSType->isArithmeticType() || LHSType->isEnumeralType()) && (RHSType->isArithmeticType() || RHSType->isEnumeralType())) return checkArithmeticOrEnumeralCompare(*this, LHS, RHS, Loc, Opc); const Expr::NullPointerConstantKind LHSNullKind = LHS.get()->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull); const Expr::NullPointerConstantKind RHSNullKind = RHS.get()->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull); bool LHSIsNull = LHSNullKind != Expr::NPCK_NotNull; bool RHSIsNull = RHSNullKind != Expr::NPCK_NotNull; auto computeResultTy = [&]() { if (Opc != BO_Cmp) return Context.getLogicalOperationType(); assert(getLangOpts().CPlusPlus); assert(Context.hasSameType(LHS.get()->getType(), RHS.get()->getType())); QualType CompositeTy = LHS.get()->getType(); assert(!CompositeTy->isReferenceType()); Optional CCT = getComparisonCategoryForBuiltinCmp(CompositeTy); if (!CCT) return InvalidOperands(Loc, LHS, RHS); if (CompositeTy->isPointerType() && LHSIsNull != RHSIsNull) { // P0946R0: Comparisons between a null pointer constant and an object // pointer result in std::strong_equality, which is ill-formed under // P1959R0. Diag(Loc, diag::err_typecheck_three_way_comparison_of_pointer_and_zero) << (LHSIsNull ? LHS.get()->getSourceRange() : RHS.get()->getSourceRange()); return QualType(); } return CheckComparisonCategoryType( *CCT, Loc, ComparisonCategoryUsage::OperatorInExpression); }; if (!IsOrdered && LHSIsNull != RHSIsNull) { bool IsEquality = Opc == BO_EQ; if (RHSIsNull) DiagnoseAlwaysNonNullPointer(LHS.get(), RHSNullKind, IsEquality, RHS.get()->getSourceRange()); else DiagnoseAlwaysNonNullPointer(RHS.get(), LHSNullKind, IsEquality, LHS.get()->getSourceRange()); } if (IsOrdered && LHSType->isFunctionPointerType() && RHSType->isFunctionPointerType()) { // Valid unless a relational comparison of function pointers bool IsError = Opc == BO_Cmp; auto DiagID = IsError ? diag::err_typecheck_ordered_comparison_of_function_pointers : getLangOpts().CPlusPlus ? diag::warn_typecheck_ordered_comparison_of_function_pointers : diag::ext_typecheck_ordered_comparison_of_function_pointers; Diag(Loc, DiagID) << LHSType << RHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); if (IsError) return QualType(); } if ((LHSType->isIntegerType() && !LHSIsNull) || (RHSType->isIntegerType() && !RHSIsNull)) { // Skip normal pointer conversion checks in this case; we have better // diagnostics for this below. } else if (getLangOpts().CPlusPlus) { // Equality comparison of a function pointer to a void pointer is invalid, // but we allow it as an extension. // FIXME: If we really want to allow this, should it be part of composite // pointer type computation so it works in conditionals too? if (!IsOrdered && ((LHSType->isFunctionPointerType() && RHSType->isVoidPointerType()) || (RHSType->isFunctionPointerType() && LHSType->isVoidPointerType()))) { // This is a gcc extension compatibility comparison. // In a SFINAE context, we treat this as a hard error to maintain // conformance with the C++ standard. diagnoseFunctionPointerToVoidComparison( *this, Loc, LHS, RHS, /*isError*/ (bool)isSFINAEContext()); if (isSFINAEContext()) return QualType(); RHS = ImpCastExprToType(RHS.get(), LHSType, CK_BitCast); return computeResultTy(); } // C++ [expr.eq]p2: // If at least one operand is a pointer [...] bring them to their // composite pointer type. // C++ [expr.spaceship]p6 // If at least one of the operands is of pointer type, [...] bring them // to their composite pointer type. // C++ [expr.rel]p2: // If both operands are pointers, [...] bring them to their composite // pointer type. // For <=>, the only valid non-pointer types are arrays and functions, and // we already decayed those, so this is really the same as the relational // comparison rule. if ((int)LHSType->isPointerType() + (int)RHSType->isPointerType() >= (IsOrdered ? 2 : 1) && (!LangOpts.ObjCAutoRefCount || !(LHSType->isObjCObjectPointerType() || RHSType->isObjCObjectPointerType()))) { if (convertPointersToCompositeType(*this, Loc, LHS, RHS)) return QualType(); return computeResultTy(); } } else if (LHSType->isPointerType() && RHSType->isPointerType()) { // C99 6.5.8p2 // All of the following pointer-related warnings are GCC extensions, except // when handling null pointer constants. QualType LCanPointeeTy = LHSType->castAs()->getPointeeType().getCanonicalType(); QualType RCanPointeeTy = RHSType->castAs()->getPointeeType().getCanonicalType(); // C99 6.5.9p2 and C99 6.5.8p2 if (Context.typesAreCompatible(LCanPointeeTy.getUnqualifiedType(), RCanPointeeTy.getUnqualifiedType())) { if (IsRelational) { // Pointers both need to point to complete or incomplete types if ((LCanPointeeTy->isIncompleteType() != RCanPointeeTy->isIncompleteType()) && !getLangOpts().C11) { Diag(Loc, diag::ext_typecheck_compare_complete_incomplete_pointers) << LHS.get()->getSourceRange() << RHS.get()->getSourceRange() << LHSType << RHSType << LCanPointeeTy->isIncompleteType() << RCanPointeeTy->isIncompleteType(); } } } else if (!IsRelational && (LCanPointeeTy->isVoidType() || RCanPointeeTy->isVoidType())) { // Valid unless comparison between non-null pointer and function pointer if ((LCanPointeeTy->isFunctionType() || RCanPointeeTy->isFunctionType()) && !LHSIsNull && !RHSIsNull) diagnoseFunctionPointerToVoidComparison(*this, Loc, LHS, RHS, /*isError*/false); } else { // Invalid diagnoseDistinctPointerComparison(*this, Loc, LHS, RHS, /*isError*/false); } if (LCanPointeeTy != RCanPointeeTy) { // Treat NULL constant as a special case in OpenCL. if (getLangOpts().OpenCL && !LHSIsNull && !RHSIsNull) { if (!LCanPointeeTy.isAddressSpaceOverlapping(RCanPointeeTy)) { Diag(Loc, diag::err_typecheck_op_on_nonoverlapping_address_space_pointers) << LHSType << RHSType << 0 /* comparison */ << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } } LangAS AddrSpaceL = LCanPointeeTy.getAddressSpace(); LangAS AddrSpaceR = RCanPointeeTy.getAddressSpace(); CastKind Kind = AddrSpaceL != AddrSpaceR ? CK_AddressSpaceConversion : CK_BitCast; if (LHSIsNull && !RHSIsNull) LHS = ImpCastExprToType(LHS.get(), RHSType, Kind); else RHS = ImpCastExprToType(RHS.get(), LHSType, Kind); } return computeResultTy(); } if (getLangOpts().CPlusPlus) { // C++ [expr.eq]p4: // Two operands of type std::nullptr_t or one operand of type // std::nullptr_t and the other a null pointer constant compare equal. if (!IsOrdered && LHSIsNull && RHSIsNull) { if (LHSType->isNullPtrType()) { RHS = ImpCastExprToType(RHS.get(), LHSType, CK_NullToPointer); return computeResultTy(); } if (RHSType->isNullPtrType()) { LHS = ImpCastExprToType(LHS.get(), RHSType, CK_NullToPointer); return computeResultTy(); } } // Comparison of Objective-C pointers and block pointers against nullptr_t. // These aren't covered by the composite pointer type rules. if (!IsOrdered && RHSType->isNullPtrType() && (LHSType->isObjCObjectPointerType() || LHSType->isBlockPointerType())) { RHS = ImpCastExprToType(RHS.get(), LHSType, CK_NullToPointer); return computeResultTy(); } if (!IsOrdered && LHSType->isNullPtrType() && (RHSType->isObjCObjectPointerType() || RHSType->isBlockPointerType())) { LHS = ImpCastExprToType(LHS.get(), RHSType, CK_NullToPointer); return computeResultTy(); } if (IsRelational && ((LHSType->isNullPtrType() && RHSType->isPointerType()) || (RHSType->isNullPtrType() && LHSType->isPointerType()))) { // HACK: Relational comparison of nullptr_t against a pointer type is // invalid per DR583, but we allow it within std::less<> and friends, // since otherwise common uses of it break. // FIXME: Consider removing this hack once LWG fixes std::less<> and // friends to have std::nullptr_t overload candidates. DeclContext *DC = CurContext; if (isa(DC)) DC = DC->getParent(); if (auto *CTSD = dyn_cast(DC)) { if (CTSD->isInStdNamespace() && llvm::StringSwitch(CTSD->getName()) .Cases("less", "less_equal", "greater", "greater_equal", true) .Default(false)) { if (RHSType->isNullPtrType()) RHS = ImpCastExprToType(RHS.get(), LHSType, CK_NullToPointer); else LHS = ImpCastExprToType(LHS.get(), RHSType, CK_NullToPointer); return computeResultTy(); } } } // C++ [expr.eq]p2: // If at least one operand is a pointer to member, [...] bring them to // their composite pointer type. if (!IsOrdered && (LHSType->isMemberPointerType() || RHSType->isMemberPointerType())) { if (convertPointersToCompositeType(*this, Loc, LHS, RHS)) return QualType(); else return computeResultTy(); } } // Handle block pointer types. if (!IsOrdered && LHSType->isBlockPointerType() && RHSType->isBlockPointerType()) { QualType lpointee = LHSType->castAs()->getPointeeType(); QualType rpointee = RHSType->castAs()->getPointeeType(); if (!LHSIsNull && !RHSIsNull && !Context.typesAreCompatible(lpointee, rpointee)) { Diag(Loc, diag::err_typecheck_comparison_of_distinct_blocks) << LHSType << RHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } RHS = ImpCastExprToType(RHS.get(), LHSType, CK_BitCast); return computeResultTy(); } // Allow block pointers to be compared with null pointer constants. if (!IsOrdered && ((LHSType->isBlockPointerType() && RHSType->isPointerType()) || (LHSType->isPointerType() && RHSType->isBlockPointerType()))) { if (!LHSIsNull && !RHSIsNull) { if (!((RHSType->isPointerType() && RHSType->castAs() ->getPointeeType()->isVoidType()) || (LHSType->isPointerType() && LHSType->castAs() ->getPointeeType()->isVoidType()))) Diag(Loc, diag::err_typecheck_comparison_of_distinct_blocks) << LHSType << RHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } if (LHSIsNull && !RHSIsNull) LHS = ImpCastExprToType(LHS.get(), RHSType, RHSType->isPointerType() ? CK_BitCast : CK_AnyPointerToBlockPointerCast); else RHS = ImpCastExprToType(RHS.get(), LHSType, LHSType->isPointerType() ? CK_BitCast : CK_AnyPointerToBlockPointerCast); return computeResultTy(); } if (LHSType->isObjCObjectPointerType() || RHSType->isObjCObjectPointerType()) { const PointerType *LPT = LHSType->getAs(); const PointerType *RPT = RHSType->getAs(); if (LPT || RPT) { bool LPtrToVoid = LPT ? LPT->getPointeeType()->isVoidType() : false; bool RPtrToVoid = RPT ? RPT->getPointeeType()->isVoidType() : false; if (!LPtrToVoid && !RPtrToVoid && !Context.typesAreCompatible(LHSType, RHSType)) { diagnoseDistinctPointerComparison(*this, Loc, LHS, RHS, /*isError*/false); } // FIXME: If LPtrToVoid, we should presumably convert the LHS rather than // the RHS, but we have test coverage for this behavior. // FIXME: Consider using convertPointersToCompositeType in C++. if (LHSIsNull && !RHSIsNull) { Expr *E = LHS.get(); if (getLangOpts().ObjCAutoRefCount) CheckObjCConversion(SourceRange(), RHSType, E, CCK_ImplicitConversion); LHS = ImpCastExprToType(E, RHSType, RPT ? CK_BitCast :CK_CPointerToObjCPointerCast); } else { Expr *E = RHS.get(); if (getLangOpts().ObjCAutoRefCount) CheckObjCConversion(SourceRange(), LHSType, E, CCK_ImplicitConversion, /*Diagnose=*/true, /*DiagnoseCFAudited=*/false, Opc); RHS = ImpCastExprToType(E, LHSType, LPT ? CK_BitCast :CK_CPointerToObjCPointerCast); } return computeResultTy(); } if (LHSType->isObjCObjectPointerType() && RHSType->isObjCObjectPointerType()) { if (!Context.areComparableObjCPointerTypes(LHSType, RHSType)) diagnoseDistinctPointerComparison(*this, Loc, LHS, RHS, /*isError*/false); if (isObjCObjectLiteral(LHS) || isObjCObjectLiteral(RHS)) diagnoseObjCLiteralComparison(*this, Loc, LHS, RHS, Opc); if (LHSIsNull && !RHSIsNull) LHS = ImpCastExprToType(LHS.get(), RHSType, CK_BitCast); else RHS = ImpCastExprToType(RHS.get(), LHSType, CK_BitCast); return computeResultTy(); } if (!IsOrdered && LHSType->isBlockPointerType() && RHSType->isBlockCompatibleObjCPointerType(Context)) { LHS = ImpCastExprToType(LHS.get(), RHSType, CK_BlockPointerToObjCPointerCast); return computeResultTy(); } else if (!IsOrdered && LHSType->isBlockCompatibleObjCPointerType(Context) && RHSType->isBlockPointerType()) { RHS = ImpCastExprToType(RHS.get(), LHSType, CK_BlockPointerToObjCPointerCast); return computeResultTy(); } } if ((LHSType->isAnyPointerType() && RHSType->isIntegerType()) || (LHSType->isIntegerType() && RHSType->isAnyPointerType())) { unsigned DiagID = 0; bool isError = false; if (LangOpts.DebuggerSupport) { // Under a debugger, allow the comparison of pointers to integers, // since users tend to want to compare addresses. } else if ((LHSIsNull && LHSType->isIntegerType()) || (RHSIsNull && RHSType->isIntegerType())) { if (IsOrdered) { isError = getLangOpts().CPlusPlus; DiagID = isError ? diag::err_typecheck_ordered_comparison_of_pointer_and_zero : diag::ext_typecheck_ordered_comparison_of_pointer_and_zero; } } else if (getLangOpts().CPlusPlus) { DiagID = diag::err_typecheck_comparison_of_pointer_integer; isError = true; } else if (IsOrdered) DiagID = diag::ext_typecheck_ordered_comparison_of_pointer_integer; else DiagID = diag::ext_typecheck_comparison_of_pointer_integer; if (DiagID) { Diag(Loc, DiagID) << LHSType << RHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); if (isError) return QualType(); } if (LHSType->isIntegerType()) LHS = ImpCastExprToType(LHS.get(), RHSType, LHSIsNull ? CK_NullToPointer : CK_IntegralToPointer); else RHS = ImpCastExprToType(RHS.get(), LHSType, RHSIsNull ? CK_NullToPointer : CK_IntegralToPointer); return computeResultTy(); } // Handle block pointers. if (!IsOrdered && RHSIsNull && LHSType->isBlockPointerType() && RHSType->isIntegerType()) { RHS = ImpCastExprToType(RHS.get(), LHSType, CK_NullToPointer); return computeResultTy(); } if (!IsOrdered && LHSIsNull && LHSType->isIntegerType() && RHSType->isBlockPointerType()) { LHS = ImpCastExprToType(LHS.get(), RHSType, CK_NullToPointer); return computeResultTy(); } if (getLangOpts().getOpenCLCompatibleVersion() >= 200) { if (LHSType->isClkEventT() && RHSType->isClkEventT()) { return computeResultTy(); } if (LHSType->isQueueT() && RHSType->isQueueT()) { return computeResultTy(); } if (LHSIsNull && RHSType->isQueueT()) { LHS = ImpCastExprToType(LHS.get(), RHSType, CK_NullToPointer); return computeResultTy(); } if (LHSType->isQueueT() && RHSIsNull) { RHS = ImpCastExprToType(RHS.get(), LHSType, CK_NullToPointer); return computeResultTy(); } } return InvalidOperands(Loc, LHS, RHS); } // Return a signed ext_vector_type that is of identical size and number of // elements. For floating point vectors, return an integer type of identical // size and number of elements. In the non ext_vector_type case, search from // the largest type to the smallest type to avoid cases where long long == long, // where long gets picked over long long. QualType Sema::GetSignedVectorType(QualType V) { const VectorType *VTy = V->castAs(); unsigned TypeSize = Context.getTypeSize(VTy->getElementType()); if (isa(VTy)) { if (VTy->isExtVectorBoolType()) return Context.getExtVectorType(Context.BoolTy, VTy->getNumElements()); if (TypeSize == Context.getTypeSize(Context.CharTy)) return Context.getExtVectorType(Context.CharTy, VTy->getNumElements()); if (TypeSize == Context.getTypeSize(Context.ShortTy)) return Context.getExtVectorType(Context.ShortTy, VTy->getNumElements()); if (TypeSize == Context.getTypeSize(Context.IntTy)) return Context.getExtVectorType(Context.IntTy, VTy->getNumElements()); if (TypeSize == Context.getTypeSize(Context.Int128Ty)) return Context.getExtVectorType(Context.Int128Ty, VTy->getNumElements()); if (TypeSize == Context.getTypeSize(Context.LongTy)) return Context.getExtVectorType(Context.LongTy, VTy->getNumElements()); assert(TypeSize == Context.getTypeSize(Context.LongLongTy) && "Unhandled vector element size in vector compare"); return Context.getExtVectorType(Context.LongLongTy, VTy->getNumElements()); } if (TypeSize == Context.getTypeSize(Context.Int128Ty)) return Context.getVectorType(Context.Int128Ty, VTy->getNumElements(), VectorType::GenericVector); if (TypeSize == Context.getTypeSize(Context.LongLongTy)) return Context.getVectorType(Context.LongLongTy, VTy->getNumElements(), VectorType::GenericVector); if (TypeSize == Context.getTypeSize(Context.LongTy)) return Context.getVectorType(Context.LongTy, VTy->getNumElements(), VectorType::GenericVector); if (TypeSize == Context.getTypeSize(Context.IntTy)) return Context.getVectorType(Context.IntTy, VTy->getNumElements(), VectorType::GenericVector); if (TypeSize == Context.getTypeSize(Context.ShortTy)) return Context.getVectorType(Context.ShortTy, VTy->getNumElements(), VectorType::GenericVector); assert(TypeSize == Context.getTypeSize(Context.CharTy) && "Unhandled vector element size in vector compare"); return Context.getVectorType(Context.CharTy, VTy->getNumElements(), VectorType::GenericVector); } QualType Sema::GetSignedSizelessVectorType(QualType V) { const BuiltinType *VTy = V->castAs(); assert(VTy->isSizelessBuiltinType() && "expected sizeless type"); const QualType ETy = V->getSveEltType(Context); const auto TypeSize = Context.getTypeSize(ETy); const QualType IntTy = Context.getIntTypeForBitwidth(TypeSize, true); const llvm::ElementCount VecSize = Context.getBuiltinVectorTypeInfo(VTy).EC; return Context.getScalableVectorType(IntTy, VecSize.getKnownMinValue()); } /// CheckVectorCompareOperands - vector comparisons are a clang extension that /// operates on extended vector types. Instead of producing an IntTy result, /// like a scalar comparison, a vector comparison produces a vector of integer /// types. QualType Sema::CheckVectorCompareOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc) { if (Opc == BO_Cmp) { Diag(Loc, diag::err_three_way_vector_comparison); return QualType(); } // Check to make sure we're operating on vectors of the same type and width, // Allowing one side to be a scalar of element type. QualType vType = CheckVectorOperands(LHS, RHS, Loc, /*isCompAssign*/ false, /*AllowBothBool*/ true, /*AllowBoolConversions*/ getLangOpts().ZVector, /*AllowBooleanOperation*/ true, /*ReportInvalid*/ true); if (vType.isNull()) return vType; QualType LHSType = LHS.get()->getType(); // Determine the return type of a vector compare. By default clang will return // a scalar for all vector compares except vector bool and vector pixel. // With the gcc compiler we will always return a vector type and with the xl // compiler we will always return a scalar type. This switch allows choosing // which behavior is prefered. if (getLangOpts().AltiVec) { switch (getLangOpts().getAltivecSrcCompat()) { case LangOptions::AltivecSrcCompatKind::Mixed: // If AltiVec, the comparison results in a numeric type, i.e. // bool for C++, int for C if (vType->castAs()->getVectorKind() == VectorType::AltiVecVector) return Context.getLogicalOperationType(); else Diag(Loc, diag::warn_deprecated_altivec_src_compat); break; case LangOptions::AltivecSrcCompatKind::GCC: // For GCC we always return the vector type. break; case LangOptions::AltivecSrcCompatKind::XL: return Context.getLogicalOperationType(); break; } } // For non-floating point types, check for self-comparisons of the form // x == x, x != x, x < x, etc. These always evaluate to a constant, and // often indicate logic errors in the program. diagnoseTautologicalComparison(*this, Loc, LHS.get(), RHS.get(), Opc); // Check for comparisons of floating point operands using != and ==. if (LHSType->hasFloatingRepresentation()) { assert(RHS.get()->getType()->hasFloatingRepresentation()); CheckFloatComparison(Loc, LHS.get(), RHS.get(), Opc); } // Return a signed type for the vector. return GetSignedVectorType(vType); } QualType Sema::CheckSizelessVectorCompareOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc) { if (Opc == BO_Cmp) { Diag(Loc, diag::err_three_way_vector_comparison); return QualType(); } // Check to make sure we're operating on vectors of the same type and width, // Allowing one side to be a scalar of element type. QualType vType = CheckSizelessVectorOperands( LHS, RHS, Loc, /*isCompAssign*/ false, ACK_Comparison); if (vType.isNull()) return vType; QualType LHSType = LHS.get()->getType(); // For non-floating point types, check for self-comparisons of the form // x == x, x != x, x < x, etc. These always evaluate to a constant, and // often indicate logic errors in the program. diagnoseTautologicalComparison(*this, Loc, LHS.get(), RHS.get(), Opc); // Check for comparisons of floating point operands using != and ==. if (LHSType->hasFloatingRepresentation()) { assert(RHS.get()->getType()->hasFloatingRepresentation()); CheckFloatComparison(Loc, LHS.get(), RHS.get(), Opc); } const BuiltinType *LHSBuiltinTy = LHSType->getAs(); const BuiltinType *RHSBuiltinTy = RHS.get()->getType()->getAs(); if (LHSBuiltinTy && RHSBuiltinTy && LHSBuiltinTy->isSVEBool() && RHSBuiltinTy->isSVEBool()) return LHSType; // Return a signed type for the vector. return GetSignedSizelessVectorType(vType); } static void diagnoseXorMisusedAsPow(Sema &S, const ExprResult &XorLHS, const ExprResult &XorRHS, const SourceLocation Loc) { // Do not diagnose macros. if (Loc.isMacroID()) return; // Do not diagnose if both LHS and RHS are macros. if (XorLHS.get()->getExprLoc().isMacroID() && XorRHS.get()->getExprLoc().isMacroID()) return; bool Negative = false; bool ExplicitPlus = false; const auto *LHSInt = dyn_cast(XorLHS.get()); const auto *RHSInt = dyn_cast(XorRHS.get()); if (!LHSInt) return; if (!RHSInt) { // Check negative literals. if (const auto *UO = dyn_cast(XorRHS.get())) { UnaryOperatorKind Opc = UO->getOpcode(); if (Opc != UO_Minus && Opc != UO_Plus) return; RHSInt = dyn_cast(UO->getSubExpr()); if (!RHSInt) return; Negative = (Opc == UO_Minus); ExplicitPlus = !Negative; } else { return; } } const llvm::APInt &LeftSideValue = LHSInt->getValue(); llvm::APInt RightSideValue = RHSInt->getValue(); if (LeftSideValue != 2 && LeftSideValue != 10) return; if (LeftSideValue.getBitWidth() != RightSideValue.getBitWidth()) return; CharSourceRange ExprRange = CharSourceRange::getCharRange( LHSInt->getBeginLoc(), S.getLocForEndOfToken(RHSInt->getLocation())); llvm::StringRef ExprStr = Lexer::getSourceText(ExprRange, S.getSourceManager(), S.getLangOpts()); CharSourceRange XorRange = CharSourceRange::getCharRange(Loc, S.getLocForEndOfToken(Loc)); llvm::StringRef XorStr = Lexer::getSourceText(XorRange, S.getSourceManager(), S.getLangOpts()); // Do not diagnose if xor keyword/macro is used. if (XorStr == "xor") return; std::string LHSStr = std::string(Lexer::getSourceText( CharSourceRange::getTokenRange(LHSInt->getSourceRange()), S.getSourceManager(), S.getLangOpts())); std::string RHSStr = std::string(Lexer::getSourceText( CharSourceRange::getTokenRange(RHSInt->getSourceRange()), S.getSourceManager(), S.getLangOpts())); if (Negative) { RightSideValue = -RightSideValue; RHSStr = "-" + RHSStr; } else if (ExplicitPlus) { RHSStr = "+" + RHSStr; } StringRef LHSStrRef = LHSStr; StringRef RHSStrRef = RHSStr; // Do not diagnose literals with digit separators, binary, hexadecimal, octal // literals. if (LHSStrRef.startswith("0b") || LHSStrRef.startswith("0B") || RHSStrRef.startswith("0b") || RHSStrRef.startswith("0B") || LHSStrRef.startswith("0x") || LHSStrRef.startswith("0X") || RHSStrRef.startswith("0x") || RHSStrRef.startswith("0X") || (LHSStrRef.size() > 1 && LHSStrRef.startswith("0")) || (RHSStrRef.size() > 1 && RHSStrRef.startswith("0")) || LHSStrRef.contains('\'') || RHSStrRef.contains('\'')) return; bool SuggestXor = S.getLangOpts().CPlusPlus || S.getPreprocessor().isMacroDefined("xor"); const llvm::APInt XorValue = LeftSideValue ^ RightSideValue; int64_t RightSideIntValue = RightSideValue.getSExtValue(); if (LeftSideValue == 2 && RightSideIntValue >= 0) { std::string SuggestedExpr = "1 << " + RHSStr; bool Overflow = false; llvm::APInt One = (LeftSideValue - 1); llvm::APInt PowValue = One.sshl_ov(RightSideValue, Overflow); if (Overflow) { if (RightSideIntValue < 64) S.Diag(Loc, diag::warn_xor_used_as_pow_base) << ExprStr << toString(XorValue, 10, true) << ("1LL << " + RHSStr) << FixItHint::CreateReplacement(ExprRange, "1LL << " + RHSStr); else if (RightSideIntValue == 64) S.Diag(Loc, diag::warn_xor_used_as_pow) << ExprStr << toString(XorValue, 10, true); else return; } else { S.Diag(Loc, diag::warn_xor_used_as_pow_base_extra) << ExprStr << toString(XorValue, 10, true) << SuggestedExpr << toString(PowValue, 10, true) << FixItHint::CreateReplacement( ExprRange, (RightSideIntValue == 0) ? "1" : SuggestedExpr); } S.Diag(Loc, diag::note_xor_used_as_pow_silence) << ("0x2 ^ " + RHSStr) << SuggestXor; } else if (LeftSideValue == 10) { std::string SuggestedValue = "1e" + std::to_string(RightSideIntValue); S.Diag(Loc, diag::warn_xor_used_as_pow_base) << ExprStr << toString(XorValue, 10, true) << SuggestedValue << FixItHint::CreateReplacement(ExprRange, SuggestedValue); S.Diag(Loc, diag::note_xor_used_as_pow_silence) << ("0xA ^ " + RHSStr) << SuggestXor; } } QualType Sema::CheckVectorLogicalOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc) { // Ensure that either both operands are of the same vector type, or // one operand is of a vector type and the other is of its element type. QualType vType = CheckVectorOperands(LHS, RHS, Loc, false, /*AllowBothBool*/ true, /*AllowBoolConversions*/ false, /*AllowBooleanOperation*/ false, /*ReportInvalid*/ false); if (vType.isNull()) return InvalidOperands(Loc, LHS, RHS); if (getLangOpts().OpenCL && getLangOpts().getOpenCLCompatibleVersion() < 120 && vType->hasFloatingRepresentation()) return InvalidOperands(Loc, LHS, RHS); // FIXME: The check for C++ here is for GCC compatibility. GCC rejects the // usage of the logical operators && and || with vectors in C. This // check could be notionally dropped. if (!getLangOpts().CPlusPlus && !(isa(vType->getAs()))) return InvalidLogicalVectorOperands(Loc, LHS, RHS); return GetSignedVectorType(LHS.get()->getType()); } QualType Sema::CheckMatrixElementwiseOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsCompAssign) { if (!IsCompAssign) { LHS = DefaultFunctionArrayLvalueConversion(LHS.get()); if (LHS.isInvalid()) return QualType(); } RHS = DefaultFunctionArrayLvalueConversion(RHS.get()); if (RHS.isInvalid()) return QualType(); // For conversion purposes, we ignore any qualifiers. // For example, "const float" and "float" are equivalent. QualType LHSType = LHS.get()->getType().getUnqualifiedType(); QualType RHSType = RHS.get()->getType().getUnqualifiedType(); const MatrixType *LHSMatType = LHSType->getAs(); const MatrixType *RHSMatType = RHSType->getAs(); assert((LHSMatType || RHSMatType) && "At least one operand must be a matrix"); if (Context.hasSameType(LHSType, RHSType)) return LHSType; // Type conversion may change LHS/RHS. Keep copies to the original results, in // case we have to return InvalidOperands. ExprResult OriginalLHS = LHS; ExprResult OriginalRHS = RHS; if (LHSMatType && !RHSMatType) { RHS = tryConvertExprToType(RHS.get(), LHSMatType->getElementType()); if (!RHS.isInvalid()) return LHSType; return InvalidOperands(Loc, OriginalLHS, OriginalRHS); } if (!LHSMatType && RHSMatType) { LHS = tryConvertExprToType(LHS.get(), RHSMatType->getElementType()); if (!LHS.isInvalid()) return RHSType; return InvalidOperands(Loc, OriginalLHS, OriginalRHS); } return InvalidOperands(Loc, LHS, RHS); } QualType Sema::CheckMatrixMultiplyOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsCompAssign) { if (!IsCompAssign) { LHS = DefaultFunctionArrayLvalueConversion(LHS.get()); if (LHS.isInvalid()) return QualType(); } RHS = DefaultFunctionArrayLvalueConversion(RHS.get()); if (RHS.isInvalid()) return QualType(); auto *LHSMatType = LHS.get()->getType()->getAs(); auto *RHSMatType = RHS.get()->getType()->getAs(); assert((LHSMatType || RHSMatType) && "At least one operand must be a matrix"); if (LHSMatType && RHSMatType) { if (LHSMatType->getNumColumns() != RHSMatType->getNumRows()) return InvalidOperands(Loc, LHS, RHS); if (!Context.hasSameType(LHSMatType->getElementType(), RHSMatType->getElementType())) return InvalidOperands(Loc, LHS, RHS); return Context.getConstantMatrixType(LHSMatType->getElementType(), LHSMatType->getNumRows(), RHSMatType->getNumColumns()); } return CheckMatrixElementwiseOperands(LHS, RHS, Loc, IsCompAssign); } static bool isLegalBoolVectorBinaryOp(BinaryOperatorKind Opc) { switch (Opc) { default: return false; case BO_And: case BO_AndAssign: case BO_Or: case BO_OrAssign: case BO_Xor: case BO_XorAssign: return true; } } inline QualType Sema::CheckBitwiseOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc) { checkArithmeticNull(*this, LHS, RHS, Loc, /*IsCompare=*/false); bool IsCompAssign = Opc == BO_AndAssign || Opc == BO_OrAssign || Opc == BO_XorAssign; bool LegalBoolVecOperator = isLegalBoolVectorBinaryOp(Opc); if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) { if (LHS.get()->getType()->hasIntegerRepresentation() && RHS.get()->getType()->hasIntegerRepresentation()) return CheckVectorOperands(LHS, RHS, Loc, IsCompAssign, /*AllowBothBool*/ true, /*AllowBoolConversions*/ getLangOpts().ZVector, /*AllowBooleanOperation*/ LegalBoolVecOperator, /*ReportInvalid*/ true); return InvalidOperands(Loc, LHS, RHS); } if (LHS.get()->getType()->isVLSTBuiltinType() || RHS.get()->getType()->isVLSTBuiltinType()) { if (LHS.get()->getType()->hasIntegerRepresentation() && RHS.get()->getType()->hasIntegerRepresentation()) return CheckSizelessVectorOperands(LHS, RHS, Loc, IsCompAssign, ACK_BitwiseOp); return InvalidOperands(Loc, LHS, RHS); } if (LHS.get()->getType()->isVLSTBuiltinType() || RHS.get()->getType()->isVLSTBuiltinType()) { if (LHS.get()->getType()->hasIntegerRepresentation() && RHS.get()->getType()->hasIntegerRepresentation()) return CheckSizelessVectorOperands(LHS, RHS, Loc, IsCompAssign, ACK_BitwiseOp); return InvalidOperands(Loc, LHS, RHS); } if (Opc == BO_And) diagnoseLogicalNotOnLHSofCheck(*this, LHS, RHS, Loc, Opc); if (LHS.get()->getType()->hasFloatingRepresentation() || RHS.get()->getType()->hasFloatingRepresentation()) return InvalidOperands(Loc, LHS, RHS); ExprResult LHSResult = LHS, RHSResult = RHS; QualType compType = UsualArithmeticConversions( LHSResult, RHSResult, Loc, IsCompAssign ? ACK_CompAssign : ACK_BitwiseOp); if (LHSResult.isInvalid() || RHSResult.isInvalid()) return QualType(); LHS = LHSResult.get(); RHS = RHSResult.get(); if (Opc == BO_Xor) diagnoseXorMisusedAsPow(*this, LHS, RHS, Loc); if (!compType.isNull() && compType->isIntegralOrUnscopedEnumerationType()) return compType; return InvalidOperands(Loc, LHS, RHS); } // C99 6.5.[13,14] inline QualType Sema::CheckLogicalOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc) { // Check vector operands differently. if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) return CheckVectorLogicalOperands(LHS, RHS, Loc); bool EnumConstantInBoolContext = false; for (const ExprResult &HS : {LHS, RHS}) { if (const auto *DREHS = dyn_cast(HS.get())) { const auto *ECDHS = dyn_cast(DREHS->getDecl()); if (ECDHS && ECDHS->getInitVal() != 0 && ECDHS->getInitVal() != 1) EnumConstantInBoolContext = true; } } if (EnumConstantInBoolContext) Diag(Loc, diag::warn_enum_constant_in_bool_context); // Diagnose cases where the user write a logical and/or but probably meant a // bitwise one. We do this when the LHS is a non-bool integer and the RHS // is a constant. if (!EnumConstantInBoolContext && LHS.get()->getType()->isIntegerType() && !LHS.get()->getType()->isBooleanType() && RHS.get()->getType()->isIntegerType() && !RHS.get()->isValueDependent() && // Don't warn in macros or template instantiations. !Loc.isMacroID() && !inTemplateInstantiation()) { // If the RHS can be constant folded, and if it constant folds to something // that isn't 0 or 1 (which indicate a potential logical operation that // happened to fold to true/false) then warn. // Parens on the RHS are ignored. Expr::EvalResult EVResult; if (RHS.get()->EvaluateAsInt(EVResult, Context)) { llvm::APSInt Result = EVResult.Val.getInt(); if ((getLangOpts().Bool && !RHS.get()->getType()->isBooleanType() && !RHS.get()->getExprLoc().isMacroID()) || (Result != 0 && Result != 1)) { Diag(Loc, diag::warn_logical_instead_of_bitwise) << RHS.get()->getSourceRange() << (Opc == BO_LAnd ? "&&" : "||"); // Suggest replacing the logical operator with the bitwise version Diag(Loc, diag::note_logical_instead_of_bitwise_change_operator) << (Opc == BO_LAnd ? "&" : "|") << FixItHint::CreateReplacement( SourceRange(Loc, getLocForEndOfToken(Loc)), Opc == BO_LAnd ? "&" : "|"); if (Opc == BO_LAnd) // Suggest replacing "Foo() && kNonZero" with "Foo()" Diag(Loc, diag::note_logical_instead_of_bitwise_remove_constant) << FixItHint::CreateRemoval( SourceRange(getLocForEndOfToken(LHS.get()->getEndLoc()), RHS.get()->getEndLoc())); } } } if (!Context.getLangOpts().CPlusPlus) { // OpenCL v1.1 s6.3.g: The logical operators and (&&), or (||) do // not operate on the built-in scalar and vector float types. if (Context.getLangOpts().OpenCL && Context.getLangOpts().OpenCLVersion < 120) { if (LHS.get()->getType()->isFloatingType() || RHS.get()->getType()->isFloatingType()) return InvalidOperands(Loc, LHS, RHS); } LHS = UsualUnaryConversions(LHS.get()); if (LHS.isInvalid()) return QualType(); RHS = UsualUnaryConversions(RHS.get()); if (RHS.isInvalid()) return QualType(); if (!LHS.get()->getType()->isScalarType() || !RHS.get()->getType()->isScalarType()) return InvalidOperands(Loc, LHS, RHS); return Context.IntTy; } // The following is safe because we only use this method for // non-overloadable operands. // C++ [expr.log.and]p1 // C++ [expr.log.or]p1 // The operands are both contextually converted to type bool. ExprResult LHSRes = PerformContextuallyConvertToBool(LHS.get()); if (LHSRes.isInvalid()) return InvalidOperands(Loc, LHS, RHS); LHS = LHSRes; ExprResult RHSRes = PerformContextuallyConvertToBool(RHS.get()); if (RHSRes.isInvalid()) return InvalidOperands(Loc, LHS, RHS); RHS = RHSRes; // C++ [expr.log.and]p2 // C++ [expr.log.or]p2 // The result is a bool. return Context.BoolTy; } static bool IsReadonlyMessage(Expr *E, Sema &S) { const MemberExpr *ME = dyn_cast(E); if (!ME) return false; if (!isa(ME->getMemberDecl())) return false; ObjCMessageExpr *Base = dyn_cast( ME->getBase()->IgnoreImplicit()->IgnoreParenImpCasts()); if (!Base) return false; return Base->getMethodDecl() != nullptr; } /// Is the given expression (which must be 'const') a reference to a /// variable which was originally non-const, but which has become /// 'const' due to being captured within a block? enum NonConstCaptureKind { NCCK_None, NCCK_Block, NCCK_Lambda }; static NonConstCaptureKind isReferenceToNonConstCapture(Sema &S, Expr *E) { assert(E->isLValue() && E->getType().isConstQualified()); E = E->IgnoreParens(); // Must be a reference to a declaration from an enclosing scope. DeclRefExpr *DRE = dyn_cast(E); if (!DRE) return NCCK_None; if (!DRE->refersToEnclosingVariableOrCapture()) return NCCK_None; // The declaration must be a variable which is not declared 'const'. VarDecl *var = dyn_cast(DRE->getDecl()); if (!var) return NCCK_None; if (var->getType().isConstQualified()) return NCCK_None; assert(var->hasLocalStorage() && "capture added 'const' to non-local?"); // Decide whether the first capture was for a block or a lambda. DeclContext *DC = S.CurContext, *Prev = nullptr; // Decide whether the first capture was for a block or a lambda. while (DC) { // For init-capture, it is possible that the variable belongs to the // template pattern of the current context. if (auto *FD = dyn_cast(DC)) if (var->isInitCapture() && FD->getTemplateInstantiationPattern() == var->getDeclContext()) break; if (DC == var->getDeclContext()) break; Prev = DC; DC = DC->getParent(); } // Unless we have an init-capture, we've gone one step too far. if (!var->isInitCapture()) DC = Prev; return (isa(DC) ? NCCK_Block : NCCK_Lambda); } static bool IsTypeModifiable(QualType Ty, bool IsDereference) { Ty = Ty.getNonReferenceType(); if (IsDereference && Ty->isPointerType()) Ty = Ty->getPointeeType(); return !Ty.isConstQualified(); } // Update err_typecheck_assign_const and note_typecheck_assign_const // when this enum is changed. enum { ConstFunction, ConstVariable, ConstMember, ConstMethod, NestedConstMember, ConstUnknown, // Keep as last element }; /// Emit the "read-only variable not assignable" error and print notes to give /// more information about why the variable is not assignable, such as pointing /// to the declaration of a const variable, showing that a method is const, or /// that the function is returning a const reference. static void DiagnoseConstAssignment(Sema &S, const Expr *E, SourceLocation Loc) { SourceRange ExprRange = E->getSourceRange(); // Only emit one error on the first const found. All other consts will emit // a note to the error. bool DiagnosticEmitted = false; // Track if the current expression is the result of a dereference, and if the // next checked expression is the result of a dereference. bool IsDereference = false; bool NextIsDereference = false; // Loop to process MemberExpr chains. while (true) { IsDereference = NextIsDereference; E = E->IgnoreImplicit()->IgnoreParenImpCasts(); if (const MemberExpr *ME = dyn_cast(E)) { NextIsDereference = ME->isArrow(); const ValueDecl *VD = ME->getMemberDecl(); if (const FieldDecl *Field = dyn_cast(VD)) { // Mutable fields can be modified even if the class is const. if (Field->isMutable()) { assert(DiagnosticEmitted && "Expected diagnostic not emitted."); break; } if (!IsTypeModifiable(Field->getType(), IsDereference)) { if (!DiagnosticEmitted) { S.Diag(Loc, diag::err_typecheck_assign_const) << ExprRange << ConstMember << false /*static*/ << Field << Field->getType(); DiagnosticEmitted = true; } S.Diag(VD->getLocation(), diag::note_typecheck_assign_const) << ConstMember << false /*static*/ << Field << Field->getType() << Field->getSourceRange(); } E = ME->getBase(); continue; } else if (const VarDecl *VDecl = dyn_cast(VD)) { if (VDecl->getType().isConstQualified()) { if (!DiagnosticEmitted) { S.Diag(Loc, diag::err_typecheck_assign_const) << ExprRange << ConstMember << true /*static*/ << VDecl << VDecl->getType(); DiagnosticEmitted = true; } S.Diag(VD->getLocation(), diag::note_typecheck_assign_const) << ConstMember << true /*static*/ << VDecl << VDecl->getType() << VDecl->getSourceRange(); } // Static fields do not inherit constness from parents. break; } break; // End MemberExpr } else if (const ArraySubscriptExpr *ASE = dyn_cast(E)) { E = ASE->getBase()->IgnoreParenImpCasts(); continue; } else if (const ExtVectorElementExpr *EVE = dyn_cast(E)) { E = EVE->getBase()->IgnoreParenImpCasts(); continue; } break; } if (const CallExpr *CE = dyn_cast(E)) { // Function calls const FunctionDecl *FD = CE->getDirectCallee(); if (FD && !IsTypeModifiable(FD->getReturnType(), IsDereference)) { if (!DiagnosticEmitted) { S.Diag(Loc, diag::err_typecheck_assign_const) << ExprRange << ConstFunction << FD; DiagnosticEmitted = true; } S.Diag(FD->getReturnTypeSourceRange().getBegin(), diag::note_typecheck_assign_const) << ConstFunction << FD << FD->getReturnType() << FD->getReturnTypeSourceRange(); } } else if (const DeclRefExpr *DRE = dyn_cast(E)) { // Point to variable declaration. if (const ValueDecl *VD = DRE->getDecl()) { if (!IsTypeModifiable(VD->getType(), IsDereference)) { if (!DiagnosticEmitted) { S.Diag(Loc, diag::err_typecheck_assign_const) << ExprRange << ConstVariable << VD << VD->getType(); DiagnosticEmitted = true; } S.Diag(VD->getLocation(), diag::note_typecheck_assign_const) << ConstVariable << VD << VD->getType() << VD->getSourceRange(); } } } else if (isa(E)) { if (const DeclContext *DC = S.getFunctionLevelDeclContext()) { if (const CXXMethodDecl *MD = dyn_cast(DC)) { if (MD->isConst()) { if (!DiagnosticEmitted) { S.Diag(Loc, diag::err_typecheck_assign_const) << ExprRange << ConstMethod << MD; DiagnosticEmitted = true; } S.Diag(MD->getLocation(), diag::note_typecheck_assign_const) << ConstMethod << MD << MD->getSourceRange(); } } } } if (DiagnosticEmitted) return; // Can't determine a more specific message, so display the generic error. S.Diag(Loc, diag::err_typecheck_assign_const) << ExprRange << ConstUnknown; } enum OriginalExprKind { OEK_Variable, OEK_Member, OEK_LValue }; static void DiagnoseRecursiveConstFields(Sema &S, const ValueDecl *VD, const RecordType *Ty, SourceLocation Loc, SourceRange Range, OriginalExprKind OEK, bool &DiagnosticEmitted) { std::vector RecordTypeList; RecordTypeList.push_back(Ty); unsigned NextToCheckIndex = 0; // We walk the record hierarchy breadth-first to ensure that we print // diagnostics in field nesting order. while (RecordTypeList.size() > NextToCheckIndex) { bool IsNested = NextToCheckIndex > 0; for (const FieldDecl *Field : RecordTypeList[NextToCheckIndex]->getDecl()->fields()) { // First, check every field for constness. QualType FieldTy = Field->getType(); if (FieldTy.isConstQualified()) { if (!DiagnosticEmitted) { S.Diag(Loc, diag::err_typecheck_assign_const) << Range << NestedConstMember << OEK << VD << IsNested << Field; DiagnosticEmitted = true; } S.Diag(Field->getLocation(), diag::note_typecheck_assign_const) << NestedConstMember << IsNested << Field << FieldTy << Field->getSourceRange(); } // Then we append it to the list to check next in order. FieldTy = FieldTy.getCanonicalType(); if (const auto *FieldRecTy = FieldTy->getAs()) { if (!llvm::is_contained(RecordTypeList, FieldRecTy)) RecordTypeList.push_back(FieldRecTy); } } ++NextToCheckIndex; } } /// Emit an error for the case where a record we are trying to assign to has a /// const-qualified field somewhere in its hierarchy. static void DiagnoseRecursiveConstFields(Sema &S, const Expr *E, SourceLocation Loc) { QualType Ty = E->getType(); assert(Ty->isRecordType() && "lvalue was not record?"); SourceRange Range = E->getSourceRange(); const RecordType *RTy = Ty.getCanonicalType()->getAs(); bool DiagEmitted = false; if (const MemberExpr *ME = dyn_cast(E)) DiagnoseRecursiveConstFields(S, ME->getMemberDecl(), RTy, Loc, Range, OEK_Member, DiagEmitted); else if (const DeclRefExpr *DRE = dyn_cast(E)) DiagnoseRecursiveConstFields(S, DRE->getDecl(), RTy, Loc, Range, OEK_Variable, DiagEmitted); else DiagnoseRecursiveConstFields(S, nullptr, RTy, Loc, Range, OEK_LValue, DiagEmitted); if (!DiagEmitted) DiagnoseConstAssignment(S, E, Loc); } /// CheckForModifiableLvalue - Verify that E is a modifiable lvalue. If not, /// emit an error and return true. If so, return false. static bool CheckForModifiableLvalue(Expr *E, SourceLocation Loc, Sema &S) { assert(!E->hasPlaceholderType(BuiltinType::PseudoObject)); S.CheckShadowingDeclModification(E, Loc); SourceLocation OrigLoc = Loc; Expr::isModifiableLvalueResult IsLV = E->isModifiableLvalue(S.Context, &Loc); if (IsLV == Expr::MLV_ClassTemporary && IsReadonlyMessage(E, S)) IsLV = Expr::MLV_InvalidMessageExpression; if (IsLV == Expr::MLV_Valid) return false; unsigned DiagID = 0; bool NeedType = false; switch (IsLV) { // C99 6.5.16p2 case Expr::MLV_ConstQualified: // Use a specialized diagnostic when we're assigning to an object // from an enclosing function or block. if (NonConstCaptureKind NCCK = isReferenceToNonConstCapture(S, E)) { if (NCCK == NCCK_Block) DiagID = diag::err_block_decl_ref_not_modifiable_lvalue; else DiagID = diag::err_lambda_decl_ref_not_modifiable_lvalue; break; } // In ARC, use some specialized diagnostics for occasions where we // infer 'const'. These are always pseudo-strong variables. if (S.getLangOpts().ObjCAutoRefCount) { DeclRefExpr *declRef = dyn_cast(E->IgnoreParenCasts()); if (declRef && isa(declRef->getDecl())) { VarDecl *var = cast(declRef->getDecl()); // Use the normal diagnostic if it's pseudo-__strong but the // user actually wrote 'const'. if (var->isARCPseudoStrong() && (!var->getTypeSourceInfo() || !var->getTypeSourceInfo()->getType().isConstQualified())) { // There are three pseudo-strong cases: // - self ObjCMethodDecl *method = S.getCurMethodDecl(); if (method && var == method->getSelfDecl()) { DiagID = method->isClassMethod() ? diag::err_typecheck_arc_assign_self_class_method : diag::err_typecheck_arc_assign_self; // - Objective-C externally_retained attribute. } else if (var->hasAttr() || isa(var)) { DiagID = diag::err_typecheck_arc_assign_externally_retained; // - fast enumeration variables } else { DiagID = diag::err_typecheck_arr_assign_enumeration; } SourceRange Assign; if (Loc != OrigLoc) Assign = SourceRange(OrigLoc, OrigLoc); S.Diag(Loc, DiagID) << E->getSourceRange() << Assign; // We need to preserve the AST regardless, so migration tool // can do its job. return false; } } } // If none of the special cases above are triggered, then this is a // simple const assignment. if (DiagID == 0) { DiagnoseConstAssignment(S, E, Loc); return true; } break; case Expr::MLV_ConstAddrSpace: DiagnoseConstAssignment(S, E, Loc); return true; case Expr::MLV_ConstQualifiedField: DiagnoseRecursiveConstFields(S, E, Loc); return true; case Expr::MLV_ArrayType: case Expr::MLV_ArrayTemporary: DiagID = diag::err_typecheck_array_not_modifiable_lvalue; NeedType = true; break; case Expr::MLV_NotObjectType: DiagID = diag::err_typecheck_non_object_not_modifiable_lvalue; NeedType = true; break; case Expr::MLV_LValueCast: DiagID = diag::err_typecheck_lvalue_casts_not_supported; break; case Expr::MLV_Valid: llvm_unreachable("did not take early return for MLV_Valid"); case Expr::MLV_InvalidExpression: case Expr::MLV_MemberFunction: case Expr::MLV_ClassTemporary: DiagID = diag::err_typecheck_expression_not_modifiable_lvalue; break; case Expr::MLV_IncompleteType: case Expr::MLV_IncompleteVoidType: return S.RequireCompleteType(Loc, E->getType(), diag::err_typecheck_incomplete_type_not_modifiable_lvalue, E); case Expr::MLV_DuplicateVectorComponents: DiagID = diag::err_typecheck_duplicate_vector_components_not_mlvalue; break; case Expr::MLV_NoSetterProperty: llvm_unreachable("readonly properties should be processed differently"); case Expr::MLV_InvalidMessageExpression: DiagID = diag::err_readonly_message_assignment; break; case Expr::MLV_SubObjCPropertySetting: DiagID = diag::err_no_subobject_property_setting; break; } SourceRange Assign; if (Loc != OrigLoc) Assign = SourceRange(OrigLoc, OrigLoc); if (NeedType) S.Diag(Loc, DiagID) << E->getType() << E->getSourceRange() << Assign; else S.Diag(Loc, DiagID) << E->getSourceRange() << Assign; return true; } static void CheckIdentityFieldAssignment(Expr *LHSExpr, Expr *RHSExpr, SourceLocation Loc, Sema &Sema) { if (Sema.inTemplateInstantiation()) return; if (Sema.isUnevaluatedContext()) return; if (Loc.isInvalid() || Loc.isMacroID()) return; if (LHSExpr->getExprLoc().isMacroID() || RHSExpr->getExprLoc().isMacroID()) return; // C / C++ fields MemberExpr *ML = dyn_cast(LHSExpr); MemberExpr *MR = dyn_cast(RHSExpr); if (ML && MR) { if (!(isa(ML->getBase()) && isa(MR->getBase()))) return; const ValueDecl *LHSDecl = cast(ML->getMemberDecl()->getCanonicalDecl()); const ValueDecl *RHSDecl = cast(MR->getMemberDecl()->getCanonicalDecl()); if (LHSDecl != RHSDecl) return; if (LHSDecl->getType().isVolatileQualified()) return; if (const ReferenceType *RefTy = LHSDecl->getType()->getAs()) if (RefTy->getPointeeType().isVolatileQualified()) return; Sema.Diag(Loc, diag::warn_identity_field_assign) << 0; } // Objective-C instance variables ObjCIvarRefExpr *OL = dyn_cast(LHSExpr); ObjCIvarRefExpr *OR = dyn_cast(RHSExpr); if (OL && OR && OL->getDecl() == OR->getDecl()) { DeclRefExpr *RL = dyn_cast(OL->getBase()->IgnoreImpCasts()); DeclRefExpr *RR = dyn_cast(OR->getBase()->IgnoreImpCasts()); if (RL && RR && RL->getDecl() == RR->getDecl()) Sema.Diag(Loc, diag::warn_identity_field_assign) << 1; } } // C99 6.5.16.1 QualType Sema::CheckAssignmentOperands(Expr *LHSExpr, ExprResult &RHS, SourceLocation Loc, QualType CompoundType, BinaryOperatorKind Opc) { assert(!LHSExpr->hasPlaceholderType(BuiltinType::PseudoObject)); // Verify that LHS is a modifiable lvalue, and emit error if not. if (CheckForModifiableLvalue(LHSExpr, Loc, *this)) return QualType(); QualType LHSType = LHSExpr->getType(); QualType RHSType = CompoundType.isNull() ? RHS.get()->getType() : CompoundType; // OpenCL v1.2 s6.1.1.1 p2: // The half data type can only be used to declare a pointer to a buffer that // contains half values if (getLangOpts().OpenCL && !getOpenCLOptions().isAvailableOption("cl_khr_fp16", getLangOpts()) && LHSType->isHalfType()) { Diag(Loc, diag::err_opencl_half_load_store) << 1 << LHSType.getUnqualifiedType(); return QualType(); } AssignConvertType ConvTy; if (CompoundType.isNull()) { Expr *RHSCheck = RHS.get(); CheckIdentityFieldAssignment(LHSExpr, RHSCheck, Loc, *this); QualType LHSTy(LHSType); ConvTy = CheckSingleAssignmentConstraints(LHSTy, RHS); if (RHS.isInvalid()) return QualType(); // Special case of NSObject attributes on c-style pointer types. if (ConvTy == IncompatiblePointer && ((Context.isObjCNSObjectType(LHSType) && RHSType->isObjCObjectPointerType()) || (Context.isObjCNSObjectType(RHSType) && LHSType->isObjCObjectPointerType()))) ConvTy = Compatible; if (ConvTy == Compatible && LHSType->isObjCObjectType()) Diag(Loc, diag::err_objc_object_assignment) << LHSType; // If the RHS is a unary plus or minus, check to see if they = and + are // right next to each other. If so, the user may have typo'd "x =+ 4" // instead of "x += 4". if (ImplicitCastExpr *ICE = dyn_cast(RHSCheck)) RHSCheck = ICE->getSubExpr(); if (UnaryOperator *UO = dyn_cast(RHSCheck)) { if ((UO->getOpcode() == UO_Plus || UO->getOpcode() == UO_Minus) && Loc.isFileID() && UO->getOperatorLoc().isFileID() && // Only if the two operators are exactly adjacent. Loc.getLocWithOffset(1) == UO->getOperatorLoc() && // And there is a space or other character before the subexpr of the // unary +/-. We don't want to warn on "x=-1". Loc.getLocWithOffset(2) != UO->getSubExpr()->getBeginLoc() && UO->getSubExpr()->getBeginLoc().isFileID()) { Diag(Loc, diag::warn_not_compound_assign) << (UO->getOpcode() == UO_Plus ? "+" : "-") << SourceRange(UO->getOperatorLoc(), UO->getOperatorLoc()); } } if (ConvTy == Compatible) { if (LHSType.getObjCLifetime() == Qualifiers::OCL_Strong) { // Warn about retain cycles where a block captures the LHS, but // not if the LHS is a simple variable into which the block is // being stored...unless that variable can be captured by reference! const Expr *InnerLHS = LHSExpr->IgnoreParenCasts(); const DeclRefExpr *DRE = dyn_cast(InnerLHS); if (!DRE || DRE->getDecl()->hasAttr()) checkRetainCycles(LHSExpr, RHS.get()); } if (LHSType.getObjCLifetime() == Qualifiers::OCL_Strong || LHSType.isNonWeakInMRRWithObjCWeak(Context)) { // It is safe to assign a weak reference into a strong variable. // Although this code can still have problems: // id x = self.weakProp; // id y = self.weakProp; // we do not warn to warn spuriously when 'x' and 'y' are on separate // paths through the function. This should be revisited if // -Wrepeated-use-of-weak is made flow-sensitive. // For ObjCWeak only, we do not warn if the assign is to a non-weak // variable, which will be valid for the current autorelease scope. if (!Diags.isIgnored(diag::warn_arc_repeated_use_of_weak, RHS.get()->getBeginLoc())) getCurFunction()->markSafeWeakUse(RHS.get()); } else if (getLangOpts().ObjCAutoRefCount || getLangOpts().ObjCWeak) { checkUnsafeExprAssigns(Loc, LHSExpr, RHS.get()); } } } else { // Compound assignment "x += y" ConvTy = CheckAssignmentConstraints(Loc, LHSType, RHSType); } if (DiagnoseAssignmentResult(ConvTy, Loc, LHSType, RHSType, RHS.get(), AA_Assigning)) return QualType(); CheckForNullPointerDereference(*this, LHSExpr); if (getLangOpts().CPlusPlus20 && LHSType.isVolatileQualified()) { if (CompoundType.isNull()) { // C++2a [expr.ass]p5: // A simple-assignment whose left operand is of a volatile-qualified // type is deprecated unless the assignment is either a discarded-value // expression or an unevaluated operand ExprEvalContexts.back().VolatileAssignmentLHSs.push_back(LHSExpr); } else { // C++20 [expr.ass]p6: // [Compound-assignment] expressions are deprecated if E1 has // volatile-qualified type and op is not one of the bitwise // operators |, &, ˆ. switch (Opc) { case BO_OrAssign: case BO_AndAssign: case BO_XorAssign: break; default: Diag(Loc, diag::warn_deprecated_compound_assign_volatile) << LHSType; } } } // C11 6.5.16p3: The type of an assignment expression is the type of the // left operand would have after lvalue conversion. // C11 6.3.2.1p2: ...this is called lvalue conversion. If the lvalue has // qualified type, the value has the unqualified version of the type of the // lvalue; additionally, if the lvalue has atomic type, the value has the // non-atomic version of the type of the lvalue. // C++ 5.17p1: the type of the assignment expression is that of its left // operand. return getLangOpts().CPlusPlus ? LHSType : LHSType.getAtomicUnqualifiedType(); } // Only ignore explicit casts to void. static bool IgnoreCommaOperand(const Expr *E) { E = E->IgnoreParens(); if (const CastExpr *CE = dyn_cast(E)) { if (CE->getCastKind() == CK_ToVoid) { return true; } // static_cast on a dependent type will not show up as CK_ToVoid. if (CE->getCastKind() == CK_Dependent && E->getType()->isVoidType() && CE->getSubExpr()->getType()->isDependentType()) { return true; } } return false; } // Look for instances where it is likely the comma operator is confused with // another operator. There is an explicit list of acceptable expressions for // the left hand side of the comma operator, otherwise emit a warning. void Sema::DiagnoseCommaOperator(const Expr *LHS, SourceLocation Loc) { // No warnings in macros if (Loc.isMacroID()) return; // Don't warn in template instantiations. if (inTemplateInstantiation()) return; // Scope isn't fine-grained enough to explicitly list the specific cases, so // instead, skip more than needed, then call back into here with the // CommaVisitor in SemaStmt.cpp. // The listed locations are the initialization and increment portions // of a for loop. The additional checks are on the condition of // if statements, do/while loops, and for loops. // Differences in scope flags for C89 mode requires the extra logic. const unsigned ForIncrementFlags = getLangOpts().C99 || getLangOpts().CPlusPlus ? Scope::ControlScope | Scope::ContinueScope | Scope::BreakScope : Scope::ContinueScope | Scope::BreakScope; const unsigned ForInitFlags = Scope::ControlScope | Scope::DeclScope; const unsigned ScopeFlags = getCurScope()->getFlags(); if ((ScopeFlags & ForIncrementFlags) == ForIncrementFlags || (ScopeFlags & ForInitFlags) == ForInitFlags) return; // If there are multiple comma operators used together, get the RHS of the // of the comma operator as the LHS. while (const BinaryOperator *BO = dyn_cast(LHS)) { if (BO->getOpcode() != BO_Comma) break; LHS = BO->getRHS(); } // Only allow some expressions on LHS to not warn. if (IgnoreCommaOperand(LHS)) return; Diag(Loc, diag::warn_comma_operator); Diag(LHS->getBeginLoc(), diag::note_cast_to_void) << LHS->getSourceRange() << FixItHint::CreateInsertion(LHS->getBeginLoc(), LangOpts.CPlusPlus ? "static_cast(" : "(void)(") << FixItHint::CreateInsertion(PP.getLocForEndOfToken(LHS->getEndLoc()), ")"); } // C99 6.5.17 static QualType CheckCommaOperands(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc) { LHS = S.CheckPlaceholderExpr(LHS.get()); RHS = S.CheckPlaceholderExpr(RHS.get()); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); // C's comma performs lvalue conversion (C99 6.3.2.1) on both its // operands, but not unary promotions. // C++'s comma does not do any conversions at all (C++ [expr.comma]p1). // So we treat the LHS as a ignored value, and in C++ we allow the // containing site to determine what should be done with the RHS. LHS = S.IgnoredValueConversions(LHS.get()); if (LHS.isInvalid()) return QualType(); S.DiagnoseUnusedExprResult(LHS.get(), diag::warn_unused_comma_left_operand); if (!S.getLangOpts().CPlusPlus) { RHS = S.DefaultFunctionArrayLvalueConversion(RHS.get()); if (RHS.isInvalid()) return QualType(); if (!RHS.get()->getType()->isVoidType()) S.RequireCompleteType(Loc, RHS.get()->getType(), diag::err_incomplete_type); } if (!S.getDiagnostics().isIgnored(diag::warn_comma_operator, Loc)) S.DiagnoseCommaOperator(LHS.get(), Loc); return RHS.get()->getType(); } /// CheckIncrementDecrementOperand - unlike most "Check" methods, this routine /// doesn't need to call UsualUnaryConversions or UsualArithmeticConversions. static QualType CheckIncrementDecrementOperand(Sema &S, Expr *Op, ExprValueKind &VK, ExprObjectKind &OK, SourceLocation OpLoc, bool IsInc, bool IsPrefix) { if (Op->isTypeDependent()) return S.Context.DependentTy; QualType ResType = Op->getType(); // Atomic types can be used for increment / decrement where the non-atomic // versions can, so ignore the _Atomic() specifier for the purpose of // checking. if (const AtomicType *ResAtomicType = ResType->getAs()) ResType = ResAtomicType->getValueType(); assert(!ResType.isNull() && "no type for increment/decrement expression"); if (S.getLangOpts().CPlusPlus && ResType->isBooleanType()) { // Decrement of bool is not allowed. if (!IsInc) { S.Diag(OpLoc, diag::err_decrement_bool) << Op->getSourceRange(); return QualType(); } // Increment of bool sets it to true, but is deprecated. S.Diag(OpLoc, S.getLangOpts().CPlusPlus17 ? diag::ext_increment_bool : diag::warn_increment_bool) << Op->getSourceRange(); } else if (S.getLangOpts().CPlusPlus && ResType->isEnumeralType()) { // Error on enum increments and decrements in C++ mode S.Diag(OpLoc, diag::err_increment_decrement_enum) << IsInc << ResType; return QualType(); } else if (ResType->isRealType()) { // OK! } else if (ResType->isPointerType()) { // C99 6.5.2.4p2, 6.5.6p2 if (!checkArithmeticOpPointerOperand(S, OpLoc, Op)) return QualType(); } else if (ResType->isObjCObjectPointerType()) { // On modern runtimes, ObjC pointer arithmetic is forbidden. // Otherwise, we just need a complete type. if (checkArithmeticIncompletePointerType(S, OpLoc, Op) || checkArithmeticOnObjCPointer(S, OpLoc, Op)) return QualType(); } else if (ResType->isAnyComplexType()) { // C99 does not support ++/-- on complex types, we allow as an extension. S.Diag(OpLoc, diag::ext_integer_increment_complex) << ResType << Op->getSourceRange(); } else if (ResType->isPlaceholderType()) { ExprResult PR = S.CheckPlaceholderExpr(Op); if (PR.isInvalid()) return QualType(); return CheckIncrementDecrementOperand(S, PR.get(), VK, OK, OpLoc, IsInc, IsPrefix); } else if (S.getLangOpts().AltiVec && ResType->isVectorType()) { // OK! ( C/C++ Language Extensions for CBEA(Version 2.6) 10.3 ) } else if (S.getLangOpts().ZVector && ResType->isVectorType() && (ResType->castAs()->getVectorKind() != VectorType::AltiVecBool)) { // The z vector extensions allow ++ and -- for non-bool vectors. } else if(S.getLangOpts().OpenCL && ResType->isVectorType() && ResType->castAs()->getElementType()->isIntegerType()) { // OpenCL V1.2 6.3 says dec/inc ops operate on integer vector types. } else { S.Diag(OpLoc, diag::err_typecheck_illegal_increment_decrement) << ResType << int(IsInc) << Op->getSourceRange(); return QualType(); } // At this point, we know we have a real, complex or pointer type. // Now make sure the operand is a modifiable lvalue. if (CheckForModifiableLvalue(Op, OpLoc, S)) return QualType(); if (S.getLangOpts().CPlusPlus20 && ResType.isVolatileQualified()) { // C++2a [expr.pre.inc]p1, [expr.post.inc]p1: // An operand with volatile-qualified type is deprecated S.Diag(OpLoc, diag::warn_deprecated_increment_decrement_volatile) << IsInc << ResType; } // In C++, a prefix increment is the same type as the operand. Otherwise // (in C or with postfix), the increment is the unqualified type of the // operand. if (IsPrefix && S.getLangOpts().CPlusPlus) { VK = VK_LValue; OK = Op->getObjectKind(); return ResType; } else { VK = VK_PRValue; return ResType.getUnqualifiedType(); } } /// getPrimaryDecl - Helper function for CheckAddressOfOperand(). /// This routine allows us to typecheck complex/recursive expressions /// where the declaration is needed for type checking. We only need to /// handle cases when the expression references a function designator /// or is an lvalue. Here are some examples: /// - &(x) => x /// - &*****f => f for f a function designator. /// - &s.xx => s /// - &s.zz[1].yy -> s, if zz is an array /// - *(x + 1) -> x, if x is an array /// - &"123"[2] -> 0 /// - & __real__ x -> x /// /// FIXME: We don't recurse to the RHS of a comma, nor handle pointers to /// members. static ValueDecl *getPrimaryDecl(Expr *E) { switch (E->getStmtClass()) { case Stmt::DeclRefExprClass: return cast(E)->getDecl(); case Stmt::MemberExprClass: // If this is an arrow operator, the address is an offset from // the base's value, so the object the base refers to is // irrelevant. if (cast(E)->isArrow()) return nullptr; // Otherwise, the expression refers to a part of the base return getPrimaryDecl(cast(E)->getBase()); case Stmt::ArraySubscriptExprClass: { // FIXME: This code shouldn't be necessary! We should catch the implicit // promotion of register arrays earlier. Expr* Base = cast(E)->getBase(); if (ImplicitCastExpr* ICE = dyn_cast(Base)) { if (ICE->getSubExpr()->getType()->isArrayType()) return getPrimaryDecl(ICE->getSubExpr()); } return nullptr; } case Stmt::UnaryOperatorClass: { UnaryOperator *UO = cast(E); switch(UO->getOpcode()) { case UO_Real: case UO_Imag: case UO_Extension: return getPrimaryDecl(UO->getSubExpr()); default: return nullptr; } } case Stmt::ParenExprClass: return getPrimaryDecl(cast(E)->getSubExpr()); case Stmt::ImplicitCastExprClass: // If the result of an implicit cast is an l-value, we care about // the sub-expression; otherwise, the result here doesn't matter. return getPrimaryDecl(cast(E)->getSubExpr()); case Stmt::CXXUuidofExprClass: return cast(E)->getGuidDecl(); default: return nullptr; } } namespace { enum { AO_Bit_Field = 0, AO_Vector_Element = 1, AO_Property_Expansion = 2, AO_Register_Variable = 3, AO_Matrix_Element = 4, AO_No_Error = 5 }; } /// Diagnose invalid operand for address of operations. /// /// \param Type The type of operand which cannot have its address taken. static void diagnoseAddressOfInvalidType(Sema &S, SourceLocation Loc, Expr *E, unsigned Type) { S.Diag(Loc, diag::err_typecheck_address_of) << Type << E->getSourceRange(); } /// CheckAddressOfOperand - The operand of & must be either a function /// designator or an lvalue designating an object. If it is an lvalue, the /// object cannot be declared with storage class register or be a bit field. /// Note: The usual conversions are *not* applied to the operand of the & /// operator (C99 6.3.2.1p[2-4]), and its result is never an lvalue. /// In C++, the operand might be an overloaded function name, in which case /// we allow the '&' but retain the overloaded-function type. QualType Sema::CheckAddressOfOperand(ExprResult &OrigOp, SourceLocation OpLoc) { if (const BuiltinType *PTy = OrigOp.get()->getType()->getAsPlaceholderType()){ if (PTy->getKind() == BuiltinType::Overload) { Expr *E = OrigOp.get()->IgnoreParens(); if (!isa(E)) { assert(cast(E)->getOpcode() == UO_AddrOf); Diag(OpLoc, diag::err_typecheck_invalid_lvalue_addrof_addrof_function) << OrigOp.get()->getSourceRange(); return QualType(); } OverloadExpr *Ovl = cast(E); if (isa(Ovl)) if (!ResolveSingleFunctionTemplateSpecialization(Ovl)) { Diag(OpLoc, diag::err_invalid_form_pointer_member_function) << OrigOp.get()->getSourceRange(); return QualType(); } return Context.OverloadTy; } if (PTy->getKind() == BuiltinType::UnknownAny) return Context.UnknownAnyTy; if (PTy->getKind() == BuiltinType::BoundMember) { Diag(OpLoc, diag::err_invalid_form_pointer_member_function) << OrigOp.get()->getSourceRange(); return QualType(); } OrigOp = CheckPlaceholderExpr(OrigOp.get()); if (OrigOp.isInvalid()) return QualType(); } if (OrigOp.get()->isTypeDependent()) return Context.DependentTy; assert(!OrigOp.get()->hasPlaceholderType()); // Make sure to ignore parentheses in subsequent checks Expr *op = OrigOp.get()->IgnoreParens(); // In OpenCL captures for blocks called as lambda functions // are located in the private address space. Blocks used in // enqueue_kernel can be located in a different address space // depending on a vendor implementation. Thus preventing // taking an address of the capture to avoid invalid AS casts. if (LangOpts.OpenCL) { auto* VarRef = dyn_cast(op); if (VarRef && VarRef->refersToEnclosingVariableOrCapture()) { Diag(op->getExprLoc(), diag::err_opencl_taking_address_capture); return QualType(); } } if (getLangOpts().C99) { // Implement C99-only parts of addressof rules. if (UnaryOperator* uOp = dyn_cast(op)) { if (uOp->getOpcode() == UO_Deref) // Per C99 6.5.3.2, the address of a deref always returns a valid result // (assuming the deref expression is valid). return uOp->getSubExpr()->getType(); } // Technically, there should be a check for array subscript // expressions here, but the result of one is always an lvalue anyway. } ValueDecl *dcl = getPrimaryDecl(op); if (auto *FD = dyn_cast_or_null(dcl)) if (!checkAddressOfFunctionIsAvailable(FD, /*Complain=*/true, op->getBeginLoc())) return QualType(); Expr::LValueClassification lval = op->ClassifyLValue(Context); unsigned AddressOfError = AO_No_Error; if (lval == Expr::LV_ClassTemporary || lval == Expr::LV_ArrayTemporary) { bool sfinae = (bool)isSFINAEContext(); Diag(OpLoc, isSFINAEContext() ? diag::err_typecheck_addrof_temporary : diag::ext_typecheck_addrof_temporary) << op->getType() << op->getSourceRange(); if (sfinae) return QualType(); // Materialize the temporary as an lvalue so that we can take its address. OrigOp = op = CreateMaterializeTemporaryExpr(op->getType(), OrigOp.get(), true); } else if (isa(op)) { return Context.getPointerType(op->getType()); } else if (lval == Expr::LV_MemberFunction) { // If it's an instance method, make a member pointer. // The expression must have exactly the form &A::foo. // If the underlying expression isn't a decl ref, give up. if (!isa(op)) { Diag(OpLoc, diag::err_invalid_form_pointer_member_function) << OrigOp.get()->getSourceRange(); return QualType(); } DeclRefExpr *DRE = cast(op); CXXMethodDecl *MD = cast(DRE->getDecl()); // The id-expression was parenthesized. if (OrigOp.get() != DRE) { Diag(OpLoc, diag::err_parens_pointer_member_function) << OrigOp.get()->getSourceRange(); // The method was named without a qualifier. } else if (!DRE->getQualifier()) { if (MD->getParent()->getName().empty()) Diag(OpLoc, diag::err_unqualified_pointer_member_function) << op->getSourceRange(); else { SmallString<32> Str; StringRef Qual = (MD->getParent()->getName() + "::").toStringRef(Str); Diag(OpLoc, diag::err_unqualified_pointer_member_function) << op->getSourceRange() << FixItHint::CreateInsertion(op->getSourceRange().getBegin(), Qual); } } // Taking the address of a dtor is illegal per C++ [class.dtor]p2. if (isa(MD)) Diag(OpLoc, diag::err_typecheck_addrof_dtor) << op->getSourceRange(); QualType MPTy = Context.getMemberPointerType( op->getType(), Context.getTypeDeclType(MD->getParent()).getTypePtr()); // Under the MS ABI, lock down the inheritance model now. if (Context.getTargetInfo().getCXXABI().isMicrosoft()) (void)isCompleteType(OpLoc, MPTy); return MPTy; } else if (lval != Expr::LV_Valid && lval != Expr::LV_IncompleteVoidType) { // C99 6.5.3.2p1 // The operand must be either an l-value or a function designator if (!op->getType()->isFunctionType()) { // Use a special diagnostic for loads from property references. if (isa(op)) { AddressOfError = AO_Property_Expansion; } else { Diag(OpLoc, diag::err_typecheck_invalid_lvalue_addrof) << op->getType() << op->getSourceRange(); return QualType(); } } } else if (op->getObjectKind() == OK_BitField) { // C99 6.5.3.2p1 // The operand cannot be a bit-field AddressOfError = AO_Bit_Field; } else if (op->getObjectKind() == OK_VectorComponent) { // The operand cannot be an element of a vector AddressOfError = AO_Vector_Element; } else if (op->getObjectKind() == OK_MatrixComponent) { // The operand cannot be an element of a matrix. AddressOfError = AO_Matrix_Element; } else if (dcl) { // C99 6.5.3.2p1 // We have an lvalue with a decl. Make sure the decl is not declared // with the register storage-class specifier. if (const VarDecl *vd = dyn_cast(dcl)) { // in C++ it is not error to take address of a register // variable (c++03 7.1.1P3) if (vd->getStorageClass() == SC_Register && !getLangOpts().CPlusPlus) { AddressOfError = AO_Register_Variable; } } else if (isa(dcl)) { AddressOfError = AO_Property_Expansion; } else if (isa(dcl)) { return Context.OverloadTy; } else if (isa(dcl) || isa(dcl)) { // Okay: we can take the address of a field. // Could be a pointer to member, though, if there is an explicit // scope qualifier for the class. if (isa(op) && cast(op)->getQualifier()) { DeclContext *Ctx = dcl->getDeclContext(); if (Ctx && Ctx->isRecord()) { if (dcl->getType()->isReferenceType()) { Diag(OpLoc, diag::err_cannot_form_pointer_to_member_of_reference_type) << dcl->getDeclName() << dcl->getType(); return QualType(); } while (cast(Ctx)->isAnonymousStructOrUnion()) Ctx = Ctx->getParent(); QualType MPTy = Context.getMemberPointerType( op->getType(), Context.getTypeDeclType(cast(Ctx)).getTypePtr()); // Under the MS ABI, lock down the inheritance model now. if (Context.getTargetInfo().getCXXABI().isMicrosoft()) (void)isCompleteType(OpLoc, MPTy); return MPTy; } } } else if (!isa(dcl)) llvm_unreachable("Unknown/unexpected decl type"); } if (AddressOfError != AO_No_Error) { diagnoseAddressOfInvalidType(*this, OpLoc, op, AddressOfError); return QualType(); } if (lval == Expr::LV_IncompleteVoidType) { // Taking the address of a void variable is technically illegal, but we // allow it in cases which are otherwise valid. // Example: "extern void x; void* y = &x;". Diag(OpLoc, diag::ext_typecheck_addrof_void) << op->getSourceRange(); } // If the operand has type "type", the result has type "pointer to type". if (op->getType()->isObjCObjectType()) return Context.getObjCObjectPointerType(op->getType()); CheckAddressOfPackedMember(op); return Context.getPointerType(op->getType()); } static void RecordModifiableNonNullParam(Sema &S, const Expr *Exp) { const DeclRefExpr *DRE = dyn_cast(Exp); if (!DRE) return; const Decl *D = DRE->getDecl(); if (!D) return; const ParmVarDecl *Param = dyn_cast(D); if (!Param) return; if (const FunctionDecl* FD = dyn_cast(Param->getDeclContext())) if (!FD->hasAttr() && !Param->hasAttr()) return; if (FunctionScopeInfo *FD = S.getCurFunction()) FD->ModifiedNonNullParams.insert(Param); } /// CheckIndirectionOperand - Type check unary indirection (prefix '*'). static QualType CheckIndirectionOperand(Sema &S, Expr *Op, ExprValueKind &VK, SourceLocation OpLoc) { if (Op->isTypeDependent()) return S.Context.DependentTy; ExprResult ConvResult = S.UsualUnaryConversions(Op); if (ConvResult.isInvalid()) return QualType(); Op = ConvResult.get(); QualType OpTy = Op->getType(); QualType Result; if (isa(Op)) { QualType OpOrigType = Op->IgnoreParenCasts()->getType(); S.CheckCompatibleReinterpretCast(OpOrigType, OpTy, /*IsDereference*/true, Op->getSourceRange()); } if (const PointerType *PT = OpTy->getAs()) { Result = PT->getPointeeType(); } else if (const ObjCObjectPointerType *OPT = OpTy->getAs()) Result = OPT->getPointeeType(); else { ExprResult PR = S.CheckPlaceholderExpr(Op); if (PR.isInvalid()) return QualType(); if (PR.get() != Op) return CheckIndirectionOperand(S, PR.get(), VK, OpLoc); } if (Result.isNull()) { S.Diag(OpLoc, diag::err_typecheck_indirection_requires_pointer) << OpTy << Op->getSourceRange(); return QualType(); } // Note that per both C89 and C99, indirection is always legal, even if Result // is an incomplete type or void. It would be possible to warn about // dereferencing a void pointer, but it's completely well-defined, and such a // warning is unlikely to catch any mistakes. In C++, indirection is not valid // for pointers to 'void' but is fine for any other pointer type: // // C++ [expr.unary.op]p1: // [...] the expression to which [the unary * operator] is applied shall // be a pointer to an object type, or a pointer to a function type if (S.getLangOpts().CPlusPlus && Result->isVoidType()) S.Diag(OpLoc, diag::ext_typecheck_indirection_through_void_pointer) << OpTy << Op->getSourceRange(); // Dereferences are usually l-values... VK = VK_LValue; // ...except that certain expressions are never l-values in C. if (!S.getLangOpts().CPlusPlus && Result.isCForbiddenLValueType()) VK = VK_PRValue; return Result; } BinaryOperatorKind Sema::ConvertTokenKindToBinaryOpcode(tok::TokenKind Kind) { BinaryOperatorKind Opc; switch (Kind) { default: llvm_unreachable("Unknown binop!"); case tok::periodstar: Opc = BO_PtrMemD; break; case tok::arrowstar: Opc = BO_PtrMemI; break; case tok::star: Opc = BO_Mul; break; case tok::slash: Opc = BO_Div; break; case tok::percent: Opc = BO_Rem; break; case tok::plus: Opc = BO_Add; break; case tok::minus: Opc = BO_Sub; break; case tok::lessless: Opc = BO_Shl; break; case tok::greatergreater: Opc = BO_Shr; break; case tok::lessequal: Opc = BO_LE; break; case tok::less: Opc = BO_LT; break; case tok::greaterequal: Opc = BO_GE; break; case tok::greater: Opc = BO_GT; break; case tok::exclaimequal: Opc = BO_NE; break; case tok::equalequal: Opc = BO_EQ; break; case tok::spaceship: Opc = BO_Cmp; break; case tok::amp: Opc = BO_And; break; case tok::caret: Opc = BO_Xor; break; case tok::pipe: Opc = BO_Or; break; case tok::ampamp: Opc = BO_LAnd; break; case tok::pipepipe: Opc = BO_LOr; break; case tok::equal: Opc = BO_Assign; break; case tok::starequal: Opc = BO_MulAssign; break; case tok::slashequal: Opc = BO_DivAssign; break; case tok::percentequal: Opc = BO_RemAssign; break; case tok::plusequal: Opc = BO_AddAssign; break; case tok::minusequal: Opc = BO_SubAssign; break; case tok::lesslessequal: Opc = BO_ShlAssign; break; case tok::greatergreaterequal: Opc = BO_ShrAssign; break; case tok::ampequal: Opc = BO_AndAssign; break; case tok::caretequal: Opc = BO_XorAssign; break; case tok::pipeequal: Opc = BO_OrAssign; break; case tok::comma: Opc = BO_Comma; break; } return Opc; } static inline UnaryOperatorKind ConvertTokenKindToUnaryOpcode( tok::TokenKind Kind) { UnaryOperatorKind Opc; switch (Kind) { default: llvm_unreachable("Unknown unary op!"); case tok::plusplus: Opc = UO_PreInc; break; case tok::minusminus: Opc = UO_PreDec; break; case tok::amp: Opc = UO_AddrOf; break; case tok::star: Opc = UO_Deref; break; case tok::plus: Opc = UO_Plus; break; case tok::minus: Opc = UO_Minus; break; case tok::tilde: Opc = UO_Not; break; case tok::exclaim: Opc = UO_LNot; break; case tok::kw___real: Opc = UO_Real; break; case tok::kw___imag: Opc = UO_Imag; break; case tok::kw___extension__: Opc = UO_Extension; break; } return Opc; } const FieldDecl * Sema::getSelfAssignmentClassMemberCandidate(const ValueDecl *SelfAssigned) { // Explore the case for adding 'this->' to the LHS of a self assignment, very // common for setters. // struct A { // int X; // -void setX(int X) { X = X; } // +void setX(int X) { this->X = X; } // }; // Only consider parameters for self assignment fixes. if (!isa(SelfAssigned)) return nullptr; const auto *Method = dyn_cast_or_null(getCurFunctionDecl(true)); if (!Method) return nullptr; const CXXRecordDecl *Parent = Method->getParent(); // In theory this is fixable if the lambda explicitly captures this, but // that's added complexity that's rarely going to be used. if (Parent->isLambda()) return nullptr; // FIXME: Use an actual Lookup operation instead of just traversing fields // in order to get base class fields. auto Field = llvm::find_if(Parent->fields(), [Name(SelfAssigned->getDeclName())](const FieldDecl *F) { return F->getDeclName() == Name; }); return (Field != Parent->field_end()) ? *Field : nullptr; } /// DiagnoseSelfAssignment - Emits a warning if a value is assigned to itself. /// This warning suppressed in the event of macro expansions. static void DiagnoseSelfAssignment(Sema &S, Expr *LHSExpr, Expr *RHSExpr, SourceLocation OpLoc, bool IsBuiltin) { if (S.inTemplateInstantiation()) return; if (S.isUnevaluatedContext()) return; if (OpLoc.isInvalid() || OpLoc.isMacroID()) return; LHSExpr = LHSExpr->IgnoreParenImpCasts(); RHSExpr = RHSExpr->IgnoreParenImpCasts(); const DeclRefExpr *LHSDeclRef = dyn_cast(LHSExpr); const DeclRefExpr *RHSDeclRef = dyn_cast(RHSExpr); if (!LHSDeclRef || !RHSDeclRef || LHSDeclRef->getLocation().isMacroID() || RHSDeclRef->getLocation().isMacroID()) return; const ValueDecl *LHSDecl = cast(LHSDeclRef->getDecl()->getCanonicalDecl()); const ValueDecl *RHSDecl = cast(RHSDeclRef->getDecl()->getCanonicalDecl()); if (LHSDecl != RHSDecl) return; if (LHSDecl->getType().isVolatileQualified()) return; if (const ReferenceType *RefTy = LHSDecl->getType()->getAs()) if (RefTy->getPointeeType().isVolatileQualified()) return; auto Diag = S.Diag(OpLoc, IsBuiltin ? diag::warn_self_assignment_builtin : diag::warn_self_assignment_overloaded) << LHSDeclRef->getType() << LHSExpr->getSourceRange() << RHSExpr->getSourceRange(); if (const FieldDecl *SelfAssignField = S.getSelfAssignmentClassMemberCandidate(RHSDecl)) Diag << 1 << SelfAssignField << FixItHint::CreateInsertion(LHSDeclRef->getBeginLoc(), "this->"); else Diag << 0; } /// Check if a bitwise-& is performed on an Objective-C pointer. This /// is usually indicative of introspection within the Objective-C pointer. static void checkObjCPointerIntrospection(Sema &S, ExprResult &L, ExprResult &R, SourceLocation OpLoc) { if (!S.getLangOpts().ObjC) return; const Expr *ObjCPointerExpr = nullptr, *OtherExpr = nullptr; const Expr *LHS = L.get(); const Expr *RHS = R.get(); if (LHS->IgnoreParenCasts()->getType()->isObjCObjectPointerType()) { ObjCPointerExpr = LHS; OtherExpr = RHS; } else if (RHS->IgnoreParenCasts()->getType()->isObjCObjectPointerType()) { ObjCPointerExpr = RHS; OtherExpr = LHS; } // This warning is deliberately made very specific to reduce false // positives with logic that uses '&' for hashing. This logic mainly // looks for code trying to introspect into tagged pointers, which // code should generally never do. if (ObjCPointerExpr && isa(OtherExpr->IgnoreParenCasts())) { unsigned Diag = diag::warn_objc_pointer_masking; // Determine if we are introspecting the result of performSelectorXXX. const Expr *Ex = ObjCPointerExpr->IgnoreParenCasts(); // Special case messages to -performSelector and friends, which // can return non-pointer values boxed in a pointer value. // Some clients may wish to silence warnings in this subcase. if (const ObjCMessageExpr *ME = dyn_cast(Ex)) { Selector S = ME->getSelector(); StringRef SelArg0 = S.getNameForSlot(0); if (SelArg0.startswith("performSelector")) Diag = diag::warn_objc_pointer_masking_performSelector; } S.Diag(OpLoc, Diag) << ObjCPointerExpr->getSourceRange(); } } static NamedDecl *getDeclFromExpr(Expr *E) { if (!E) return nullptr; if (auto *DRE = dyn_cast(E)) return DRE->getDecl(); if (auto *ME = dyn_cast(E)) return ME->getMemberDecl(); if (auto *IRE = dyn_cast(E)) return IRE->getDecl(); return nullptr; } // This helper function promotes a binary operator's operands (which are of a // half vector type) to a vector of floats and then truncates the result to // a vector of either half or short. static ExprResult convertHalfVecBinOp(Sema &S, ExprResult LHS, ExprResult RHS, BinaryOperatorKind Opc, QualType ResultTy, ExprValueKind VK, ExprObjectKind OK, bool IsCompAssign, SourceLocation OpLoc, FPOptionsOverride FPFeatures) { auto &Context = S.getASTContext(); assert((isVector(ResultTy, Context.HalfTy) || isVector(ResultTy, Context.ShortTy)) && "Result must be a vector of half or short"); assert(isVector(LHS.get()->getType(), Context.HalfTy) && isVector(RHS.get()->getType(), Context.HalfTy) && "both operands expected to be a half vector"); RHS = convertVector(RHS.get(), Context.FloatTy, S); QualType BinOpResTy = RHS.get()->getType(); // If Opc is a comparison, ResultType is a vector of shorts. In that case, // change BinOpResTy to a vector of ints. if (isVector(ResultTy, Context.ShortTy)) BinOpResTy = S.GetSignedVectorType(BinOpResTy); if (IsCompAssign) return CompoundAssignOperator::Create(Context, LHS.get(), RHS.get(), Opc, ResultTy, VK, OK, OpLoc, FPFeatures, BinOpResTy, BinOpResTy); LHS = convertVector(LHS.get(), Context.FloatTy, S); auto *BO = BinaryOperator::Create(Context, LHS.get(), RHS.get(), Opc, BinOpResTy, VK, OK, OpLoc, FPFeatures); return convertVector(BO, ResultTy->castAs()->getElementType(), S); } static std::pair CorrectDelayedTyposInBinOp(Sema &S, BinaryOperatorKind Opc, Expr *LHSExpr, Expr *RHSExpr) { ExprResult LHS = LHSExpr, RHS = RHSExpr; if (!S.Context.isDependenceAllowed()) { // C cannot handle TypoExpr nodes on either side of a binop because it // doesn't handle dependent types properly, so make sure any TypoExprs have // been dealt with before checking the operands. LHS = S.CorrectDelayedTyposInExpr(LHS); RHS = S.CorrectDelayedTyposInExpr( RHS, /*InitDecl=*/nullptr, /*RecoverUncorrectedTypos=*/false, [Opc, LHS](Expr *E) { if (Opc != BO_Assign) return ExprResult(E); // Avoid correcting the RHS to the same Expr as the LHS. Decl *D = getDeclFromExpr(E); return (D && D == getDeclFromExpr(LHS.get())) ? ExprError() : E; }); } return std::make_pair(LHS, RHS); } /// Returns true if conversion between vectors of halfs and vectors of floats /// is needed. static bool needsConversionOfHalfVec(bool OpRequiresConversion, ASTContext &Ctx, Expr *E0, Expr *E1 = nullptr) { if (!OpRequiresConversion || Ctx.getLangOpts().NativeHalfType || Ctx.getTargetInfo().useFP16ConversionIntrinsics()) return false; auto HasVectorOfHalfType = [&Ctx](Expr *E) { QualType Ty = E->IgnoreImplicit()->getType(); // Don't promote half precision neon vectors like float16x4_t in arm_neon.h // to vectors of floats. Although the element type of the vectors is __fp16, // the vectors shouldn't be treated as storage-only types. See the // discussion here: https://reviews.llvm.org/rG825235c140e7 if (const VectorType *VT = Ty->getAs()) { if (VT->getVectorKind() == VectorType::NeonVector) return false; return VT->getElementType().getCanonicalType() == Ctx.HalfTy; } return false; }; return HasVectorOfHalfType(E0) && (!E1 || HasVectorOfHalfType(E1)); } /// CreateBuiltinBinOp - Creates a new built-in binary operation with /// operator @p Opc at location @c TokLoc. This routine only supports /// built-in operations; ActOnBinOp handles overloaded operators. ExprResult Sema::CreateBuiltinBinOp(SourceLocation OpLoc, BinaryOperatorKind Opc, Expr *LHSExpr, Expr *RHSExpr) { if (getLangOpts().CPlusPlus11 && isa(RHSExpr)) { // The syntax only allows initializer lists on the RHS of assignment, // so we don't need to worry about accepting invalid code for // non-assignment operators. // C++11 5.17p9: // The meaning of x = {v} [...] is that of x = T(v) [...]. The meaning // of x = {} is x = T(). InitializationKind Kind = InitializationKind::CreateDirectList( RHSExpr->getBeginLoc(), RHSExpr->getBeginLoc(), RHSExpr->getEndLoc()); InitializedEntity Entity = InitializedEntity::InitializeTemporary(LHSExpr->getType()); InitializationSequence InitSeq(*this, Entity, Kind, RHSExpr); ExprResult Init = InitSeq.Perform(*this, Entity, Kind, RHSExpr); if (Init.isInvalid()) return Init; RHSExpr = Init.get(); } ExprResult LHS = LHSExpr, RHS = RHSExpr; QualType ResultTy; // Result type of the binary operator. // The following two variables are used for compound assignment operators QualType CompLHSTy; // Type of LHS after promotions for computation QualType CompResultTy; // Type of computation result ExprValueKind VK = VK_PRValue; ExprObjectKind OK = OK_Ordinary; bool ConvertHalfVec = false; std::tie(LHS, RHS) = CorrectDelayedTyposInBinOp(*this, Opc, LHSExpr, RHSExpr); if (!LHS.isUsable() || !RHS.isUsable()) return ExprError(); if (getLangOpts().OpenCL) { QualType LHSTy = LHSExpr->getType(); QualType RHSTy = RHSExpr->getType(); // OpenCLC v2.0 s6.13.11.1 allows atomic variables to be initialized by // the ATOMIC_VAR_INIT macro. if (LHSTy->isAtomicType() || RHSTy->isAtomicType()) { SourceRange SR(LHSExpr->getBeginLoc(), RHSExpr->getEndLoc()); if (BO_Assign == Opc) Diag(OpLoc, diag::err_opencl_atomic_init) << 0 << SR; else ResultTy = InvalidOperands(OpLoc, LHS, RHS); return ExprError(); } // OpenCL special types - image, sampler, pipe, and blocks are to be used // only with a builtin functions and therefore should be disallowed here. if (LHSTy->isImageType() || RHSTy->isImageType() || LHSTy->isSamplerT() || RHSTy->isSamplerT() || LHSTy->isPipeType() || RHSTy->isPipeType() || LHSTy->isBlockPointerType() || RHSTy->isBlockPointerType()) { ResultTy = InvalidOperands(OpLoc, LHS, RHS); return ExprError(); } } checkTypeSupport(LHSExpr->getType(), OpLoc, /*ValueDecl*/ nullptr); checkTypeSupport(RHSExpr->getType(), OpLoc, /*ValueDecl*/ nullptr); switch (Opc) { case BO_Assign: ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, QualType(), Opc); if (getLangOpts().CPlusPlus && LHS.get()->getObjectKind() != OK_ObjCProperty) { VK = LHS.get()->getValueKind(); OK = LHS.get()->getObjectKind(); } if (!ResultTy.isNull()) { DiagnoseSelfAssignment(*this, LHS.get(), RHS.get(), OpLoc, true); DiagnoseSelfMove(LHS.get(), RHS.get(), OpLoc); // Avoid copying a block to the heap if the block is assigned to a local // auto variable that is declared in the same scope as the block. This // optimization is unsafe if the local variable is declared in an outer // scope. For example: // // BlockTy b; // { // b = ^{...}; // } // // It is unsafe to invoke the block here if it wasn't copied to the // // heap. // b(); if (auto *BE = dyn_cast(RHS.get()->IgnoreParens())) if (auto *DRE = dyn_cast(LHS.get()->IgnoreParens())) if (auto *VD = dyn_cast(DRE->getDecl())) if (VD->hasLocalStorage() && getCurScope()->isDeclScope(VD)) BE->getBlockDecl()->setCanAvoidCopyToHeap(); if (LHS.get()->getType().hasNonTrivialToPrimitiveCopyCUnion()) checkNonTrivialCUnion(LHS.get()->getType(), LHS.get()->getExprLoc(), NTCUC_Assignment, NTCUK_Copy); } RecordModifiableNonNullParam(*this, LHS.get()); break; case BO_PtrMemD: case BO_PtrMemI: ResultTy = CheckPointerToMemberOperands(LHS, RHS, VK, OpLoc, Opc == BO_PtrMemI); break; case BO_Mul: case BO_Div: ConvertHalfVec = true; ResultTy = CheckMultiplyDivideOperands(LHS, RHS, OpLoc, false, Opc == BO_Div); break; case BO_Rem: ResultTy = CheckRemainderOperands(LHS, RHS, OpLoc); break; case BO_Add: ConvertHalfVec = true; ResultTy = CheckAdditionOperands(LHS, RHS, OpLoc, Opc); break; case BO_Sub: ConvertHalfVec = true; ResultTy = CheckSubtractionOperands(LHS, RHS, OpLoc); break; case BO_Shl: case BO_Shr: ResultTy = CheckShiftOperands(LHS, RHS, OpLoc, Opc); break; case BO_LE: case BO_LT: case BO_GE: case BO_GT: ConvertHalfVec = true; ResultTy = CheckCompareOperands(LHS, RHS, OpLoc, Opc); break; case BO_EQ: case BO_NE: ConvertHalfVec = true; ResultTy = CheckCompareOperands(LHS, RHS, OpLoc, Opc); break; case BO_Cmp: ConvertHalfVec = true; ResultTy = CheckCompareOperands(LHS, RHS, OpLoc, Opc); assert(ResultTy.isNull() || ResultTy->getAsCXXRecordDecl()); break; case BO_And: checkObjCPointerIntrospection(*this, LHS, RHS, OpLoc); LLVM_FALLTHROUGH; case BO_Xor: case BO_Or: ResultTy = CheckBitwiseOperands(LHS, RHS, OpLoc, Opc); break; case BO_LAnd: case BO_LOr: ConvertHalfVec = true; ResultTy = CheckLogicalOperands(LHS, RHS, OpLoc, Opc); break; case BO_MulAssign: case BO_DivAssign: ConvertHalfVec = true; CompResultTy = CheckMultiplyDivideOperands(LHS, RHS, OpLoc, true, Opc == BO_DivAssign); CompLHSTy = CompResultTy; if (!CompResultTy.isNull() && !LHS.isInvalid() && !RHS.isInvalid()) ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, CompResultTy, Opc); break; case BO_RemAssign: CompResultTy = CheckRemainderOperands(LHS, RHS, OpLoc, true); CompLHSTy = CompResultTy; if (!CompResultTy.isNull() && !LHS.isInvalid() && !RHS.isInvalid()) ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, CompResultTy, Opc); break; case BO_AddAssign: ConvertHalfVec = true; CompResultTy = CheckAdditionOperands(LHS, RHS, OpLoc, Opc, &CompLHSTy); if (!CompResultTy.isNull() && !LHS.isInvalid() && !RHS.isInvalid()) ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, CompResultTy, Opc); break; case BO_SubAssign: ConvertHalfVec = true; CompResultTy = CheckSubtractionOperands(LHS, RHS, OpLoc, &CompLHSTy); if (!CompResultTy.isNull() && !LHS.isInvalid() && !RHS.isInvalid()) ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, CompResultTy, Opc); break; case BO_ShlAssign: case BO_ShrAssign: CompResultTy = CheckShiftOperands(LHS, RHS, OpLoc, Opc, true); CompLHSTy = CompResultTy; if (!CompResultTy.isNull() && !LHS.isInvalid() && !RHS.isInvalid()) ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, CompResultTy, Opc); break; case BO_AndAssign: case BO_OrAssign: // fallthrough DiagnoseSelfAssignment(*this, LHS.get(), RHS.get(), OpLoc, true); LLVM_FALLTHROUGH; case BO_XorAssign: CompResultTy = CheckBitwiseOperands(LHS, RHS, OpLoc, Opc); CompLHSTy = CompResultTy; if (!CompResultTy.isNull() && !LHS.isInvalid() && !RHS.isInvalid()) ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, CompResultTy, Opc); break; case BO_Comma: ResultTy = CheckCommaOperands(*this, LHS, RHS, OpLoc); if (getLangOpts().CPlusPlus && !RHS.isInvalid()) { VK = RHS.get()->getValueKind(); OK = RHS.get()->getObjectKind(); } break; } if (ResultTy.isNull() || LHS.isInvalid() || RHS.isInvalid()) return ExprError(); // Some of the binary operations require promoting operands of half vector to // float vectors and truncating the result back to half vector. For now, we do // this only when HalfArgsAndReturn is set (that is, when the target is arm or // arm64). assert( (Opc == BO_Comma || isVector(RHS.get()->getType(), Context.HalfTy) == isVector(LHS.get()->getType(), Context.HalfTy)) && "both sides are half vectors or neither sides are"); ConvertHalfVec = needsConversionOfHalfVec(ConvertHalfVec, Context, LHS.get(), RHS.get()); // Check for array bounds violations for both sides of the BinaryOperator CheckArrayAccess(LHS.get()); CheckArrayAccess(RHS.get()); if (const ObjCIsaExpr *OISA = dyn_cast(LHS.get()->IgnoreParenCasts())) { NamedDecl *ObjectSetClass = LookupSingleName(TUScope, &Context.Idents.get("object_setClass"), SourceLocation(), LookupOrdinaryName); if (ObjectSetClass && isa(LHS.get())) { SourceLocation RHSLocEnd = getLocForEndOfToken(RHS.get()->getEndLoc()); Diag(LHS.get()->getExprLoc(), diag::warn_objc_isa_assign) << FixItHint::CreateInsertion(LHS.get()->getBeginLoc(), "object_setClass(") << FixItHint::CreateReplacement(SourceRange(OISA->getOpLoc(), OpLoc), ",") << FixItHint::CreateInsertion(RHSLocEnd, ")"); } else Diag(LHS.get()->getExprLoc(), diag::warn_objc_isa_assign); } else if (const ObjCIvarRefExpr *OIRE = dyn_cast(LHS.get()->IgnoreParenCasts())) DiagnoseDirectIsaAccess(*this, OIRE, OpLoc, RHS.get()); // Opc is not a compound assignment if CompResultTy is null. if (CompResultTy.isNull()) { if (ConvertHalfVec) return convertHalfVecBinOp(*this, LHS, RHS, Opc, ResultTy, VK, OK, false, OpLoc, CurFPFeatureOverrides()); return BinaryOperator::Create(Context, LHS.get(), RHS.get(), Opc, ResultTy, VK, OK, OpLoc, CurFPFeatureOverrides()); } // Handle compound assignments. if (getLangOpts().CPlusPlus && LHS.get()->getObjectKind() != OK_ObjCProperty) { VK = VK_LValue; OK = LHS.get()->getObjectKind(); } // The LHS is not converted to the result type for fixed-point compound // assignment as the common type is computed on demand. Reset the CompLHSTy // to the LHS type we would have gotten after unary conversions. if (CompResultTy->isFixedPointType()) CompLHSTy = UsualUnaryConversions(LHS.get()).get()->getType(); if (ConvertHalfVec) return convertHalfVecBinOp(*this, LHS, RHS, Opc, ResultTy, VK, OK, true, OpLoc, CurFPFeatureOverrides()); return CompoundAssignOperator::Create( Context, LHS.get(), RHS.get(), Opc, ResultTy, VK, OK, OpLoc, CurFPFeatureOverrides(), CompLHSTy, CompResultTy); } /// DiagnoseBitwisePrecedence - Emit a warning when bitwise and comparison /// operators are mixed in a way that suggests that the programmer forgot that /// comparison operators have higher precedence. The most typical example of /// such code is "flags & 0x0020 != 0", which is equivalent to "flags & 1". static void DiagnoseBitwisePrecedence(Sema &Self, BinaryOperatorKind Opc, SourceLocation OpLoc, Expr *LHSExpr, Expr *RHSExpr) { BinaryOperator *LHSBO = dyn_cast(LHSExpr); BinaryOperator *RHSBO = dyn_cast(RHSExpr); // Check that one of the sides is a comparison operator and the other isn't. bool isLeftComp = LHSBO && LHSBO->isComparisonOp(); bool isRightComp = RHSBO && RHSBO->isComparisonOp(); if (isLeftComp == isRightComp) return; // Bitwise operations are sometimes used as eager logical ops. // Don't diagnose this. bool isLeftBitwise = LHSBO && LHSBO->isBitwiseOp(); bool isRightBitwise = RHSBO && RHSBO->isBitwiseOp(); if (isLeftBitwise || isRightBitwise) return; SourceRange DiagRange = isLeftComp ? SourceRange(LHSExpr->getBeginLoc(), OpLoc) : SourceRange(OpLoc, RHSExpr->getEndLoc()); StringRef OpStr = isLeftComp ? LHSBO->getOpcodeStr() : RHSBO->getOpcodeStr(); SourceRange ParensRange = isLeftComp ? SourceRange(LHSBO->getRHS()->getBeginLoc(), RHSExpr->getEndLoc()) : SourceRange(LHSExpr->getBeginLoc(), RHSBO->getLHS()->getEndLoc()); Self.Diag(OpLoc, diag::warn_precedence_bitwise_rel) << DiagRange << BinaryOperator::getOpcodeStr(Opc) << OpStr; SuggestParentheses(Self, OpLoc, Self.PDiag(diag::note_precedence_silence) << OpStr, (isLeftComp ? LHSExpr : RHSExpr)->getSourceRange()); SuggestParentheses(Self, OpLoc, Self.PDiag(diag::note_precedence_bitwise_first) << BinaryOperator::getOpcodeStr(Opc), ParensRange); } /// It accepts a '&&' expr that is inside a '||' one. /// Emit a diagnostic together with a fixit hint that wraps the '&&' expression /// in parentheses. static void EmitDiagnosticForLogicalAndInLogicalOr(Sema &Self, SourceLocation OpLoc, BinaryOperator *Bop) { assert(Bop->getOpcode() == BO_LAnd); Self.Diag(Bop->getOperatorLoc(), diag::warn_logical_and_in_logical_or) << Bop->getSourceRange() << OpLoc; SuggestParentheses(Self, Bop->getOperatorLoc(), Self.PDiag(diag::note_precedence_silence) << Bop->getOpcodeStr(), Bop->getSourceRange()); } /// Returns true if the given expression can be evaluated as a constant /// 'true'. static bool EvaluatesAsTrue(Sema &S, Expr *E) { bool Res; return !E->isValueDependent() && E->EvaluateAsBooleanCondition(Res, S.getASTContext()) && Res; } /// Returns true if the given expression can be evaluated as a constant /// 'false'. static bool EvaluatesAsFalse(Sema &S, Expr *E) { bool Res; return !E->isValueDependent() && E->EvaluateAsBooleanCondition(Res, S.getASTContext()) && !Res; } /// Look for '&&' in the left hand of a '||' expr. static void DiagnoseLogicalAndInLogicalOrLHS(Sema &S, SourceLocation OpLoc, Expr *LHSExpr, Expr *RHSExpr) { if (BinaryOperator *Bop = dyn_cast(LHSExpr)) { if (Bop->getOpcode() == BO_LAnd) { // If it's "a && b || 0" don't warn since the precedence doesn't matter. if (EvaluatesAsFalse(S, RHSExpr)) return; // If it's "1 && a || b" don't warn since the precedence doesn't matter. if (!EvaluatesAsTrue(S, Bop->getLHS())) return EmitDiagnosticForLogicalAndInLogicalOr(S, OpLoc, Bop); } else if (Bop->getOpcode() == BO_LOr) { if (BinaryOperator *RBop = dyn_cast(Bop->getRHS())) { // If it's "a || b && 1 || c" we didn't warn earlier for // "a || b && 1", but warn now. if (RBop->getOpcode() == BO_LAnd && EvaluatesAsTrue(S, RBop->getRHS())) return EmitDiagnosticForLogicalAndInLogicalOr(S, OpLoc, RBop); } } } } /// Look for '&&' in the right hand of a '||' expr. static void DiagnoseLogicalAndInLogicalOrRHS(Sema &S, SourceLocation OpLoc, Expr *LHSExpr, Expr *RHSExpr) { if (BinaryOperator *Bop = dyn_cast(RHSExpr)) { if (Bop->getOpcode() == BO_LAnd) { // If it's "0 || a && b" don't warn since the precedence doesn't matter. if (EvaluatesAsFalse(S, LHSExpr)) return; // If it's "a || b && 1" don't warn since the precedence doesn't matter. if (!EvaluatesAsTrue(S, Bop->getRHS())) return EmitDiagnosticForLogicalAndInLogicalOr(S, OpLoc, Bop); } } } /// Look for bitwise op in the left or right hand of a bitwise op with /// lower precedence and emit a diagnostic together with a fixit hint that wraps /// the '&' expression in parentheses. static void DiagnoseBitwiseOpInBitwiseOp(Sema &S, BinaryOperatorKind Opc, SourceLocation OpLoc, Expr *SubExpr) { if (BinaryOperator *Bop = dyn_cast(SubExpr)) { if (Bop->isBitwiseOp() && Bop->getOpcode() < Opc) { S.Diag(Bop->getOperatorLoc(), diag::warn_bitwise_op_in_bitwise_op) << Bop->getOpcodeStr() << BinaryOperator::getOpcodeStr(Opc) << Bop->getSourceRange() << OpLoc; SuggestParentheses(S, Bop->getOperatorLoc(), S.PDiag(diag::note_precedence_silence) << Bop->getOpcodeStr(), Bop->getSourceRange()); } } } static void DiagnoseAdditionInShift(Sema &S, SourceLocation OpLoc, Expr *SubExpr, StringRef Shift) { if (BinaryOperator *Bop = dyn_cast(SubExpr)) { if (Bop->getOpcode() == BO_Add || Bop->getOpcode() == BO_Sub) { StringRef Op = Bop->getOpcodeStr(); S.Diag(Bop->getOperatorLoc(), diag::warn_addition_in_bitshift) << Bop->getSourceRange() << OpLoc << Shift << Op; SuggestParentheses(S, Bop->getOperatorLoc(), S.PDiag(diag::note_precedence_silence) << Op, Bop->getSourceRange()); } } } static void DiagnoseShiftCompare(Sema &S, SourceLocation OpLoc, Expr *LHSExpr, Expr *RHSExpr) { CXXOperatorCallExpr *OCE = dyn_cast(LHSExpr); if (!OCE) return; FunctionDecl *FD = OCE->getDirectCallee(); if (!FD || !FD->isOverloadedOperator()) return; OverloadedOperatorKind Kind = FD->getOverloadedOperator(); if (Kind != OO_LessLess && Kind != OO_GreaterGreater) return; S.Diag(OpLoc, diag::warn_overloaded_shift_in_comparison) << LHSExpr->getSourceRange() << RHSExpr->getSourceRange() << (Kind == OO_LessLess); SuggestParentheses(S, OCE->getOperatorLoc(), S.PDiag(diag::note_precedence_silence) << (Kind == OO_LessLess ? "<<" : ">>"), OCE->getSourceRange()); SuggestParentheses( S, OpLoc, S.PDiag(diag::note_evaluate_comparison_first), SourceRange(OCE->getArg(1)->getBeginLoc(), RHSExpr->getEndLoc())); } /// DiagnoseBinOpPrecedence - Emit warnings for expressions with tricky /// precedence. static void DiagnoseBinOpPrecedence(Sema &Self, BinaryOperatorKind Opc, SourceLocation OpLoc, Expr *LHSExpr, Expr *RHSExpr){ // Diagnose "arg1 'bitwise' arg2 'eq' arg3". if (BinaryOperator::isBitwiseOp(Opc)) DiagnoseBitwisePrecedence(Self, Opc, OpLoc, LHSExpr, RHSExpr); // Diagnose "arg1 & arg2 | arg3" if ((Opc == BO_Or || Opc == BO_Xor) && !OpLoc.isMacroID()/* Don't warn in macros. */) { DiagnoseBitwiseOpInBitwiseOp(Self, Opc, OpLoc, LHSExpr); DiagnoseBitwiseOpInBitwiseOp(Self, Opc, OpLoc, RHSExpr); } // Warn about arg1 || arg2 && arg3, as GCC 4.3+ does. // We don't warn for 'assert(a || b && "bad")' since this is safe. if (Opc == BO_LOr && !OpLoc.isMacroID()/* Don't warn in macros. */) { DiagnoseLogicalAndInLogicalOrLHS(Self, OpLoc, LHSExpr, RHSExpr); DiagnoseLogicalAndInLogicalOrRHS(Self, OpLoc, LHSExpr, RHSExpr); } if ((Opc == BO_Shl && LHSExpr->getType()->isIntegralType(Self.getASTContext())) || Opc == BO_Shr) { StringRef Shift = BinaryOperator::getOpcodeStr(Opc); DiagnoseAdditionInShift(Self, OpLoc, LHSExpr, Shift); DiagnoseAdditionInShift(Self, OpLoc, RHSExpr, Shift); } // Warn on overloaded shift operators and comparisons, such as: // cout << 5 == 4; if (BinaryOperator::isComparisonOp(Opc)) DiagnoseShiftCompare(Self, OpLoc, LHSExpr, RHSExpr); } // Binary Operators. 'Tok' is the token for the operator. ExprResult Sema::ActOnBinOp(Scope *S, SourceLocation TokLoc, tok::TokenKind Kind, Expr *LHSExpr, Expr *RHSExpr) { BinaryOperatorKind Opc = ConvertTokenKindToBinaryOpcode(Kind); assert(LHSExpr && "ActOnBinOp(): missing left expression"); assert(RHSExpr && "ActOnBinOp(): missing right expression"); // Emit warnings for tricky precedence issues, e.g. "bitfield & 0x4 == 0" DiagnoseBinOpPrecedence(*this, Opc, TokLoc, LHSExpr, RHSExpr); return BuildBinOp(S, TokLoc, Opc, LHSExpr, RHSExpr); } void Sema::LookupBinOp(Scope *S, SourceLocation OpLoc, BinaryOperatorKind Opc, UnresolvedSetImpl &Functions) { OverloadedOperatorKind OverOp = BinaryOperator::getOverloadedOperator(Opc); if (OverOp != OO_None && OverOp != OO_Equal) LookupOverloadedOperatorName(OverOp, S, Functions); // In C++20 onwards, we may have a second operator to look up. if (getLangOpts().CPlusPlus20) { if (OverloadedOperatorKind ExtraOp = getRewrittenOverloadedOperator(OverOp)) LookupOverloadedOperatorName(ExtraOp, S, Functions); } } /// Build an overloaded binary operator expression in the given scope. static ExprResult BuildOverloadedBinOp(Sema &S, Scope *Sc, SourceLocation OpLoc, BinaryOperatorKind Opc, Expr *LHS, Expr *RHS) { switch (Opc) { case BO_Assign: case BO_DivAssign: case BO_RemAssign: case BO_SubAssign: case BO_AndAssign: case BO_OrAssign: case BO_XorAssign: DiagnoseSelfAssignment(S, LHS, RHS, OpLoc, false); CheckIdentityFieldAssignment(LHS, RHS, OpLoc, S); break; default: break; } // Find all of the overloaded operators visible from this point. UnresolvedSet<16> Functions; S.LookupBinOp(Sc, OpLoc, Opc, Functions); // Build the (potentially-overloaded, potentially-dependent) // binary operation. return S.CreateOverloadedBinOp(OpLoc, Opc, Functions, LHS, RHS); } ExprResult Sema::BuildBinOp(Scope *S, SourceLocation OpLoc, BinaryOperatorKind Opc, Expr *LHSExpr, Expr *RHSExpr) { ExprResult LHS, RHS; std::tie(LHS, RHS) = CorrectDelayedTyposInBinOp(*this, Opc, LHSExpr, RHSExpr); if (!LHS.isUsable() || !RHS.isUsable()) return ExprError(); LHSExpr = LHS.get(); RHSExpr = RHS.get(); // We want to end up calling one of checkPseudoObjectAssignment // (if the LHS is a pseudo-object), BuildOverloadedBinOp (if // both expressions are overloadable or either is type-dependent), // or CreateBuiltinBinOp (in any other case). We also want to get // any placeholder types out of the way. // Handle pseudo-objects in the LHS. if (const BuiltinType *pty = LHSExpr->getType()->getAsPlaceholderType()) { // Assignments with a pseudo-object l-value need special analysis. if (pty->getKind() == BuiltinType::PseudoObject && BinaryOperator::isAssignmentOp(Opc)) return checkPseudoObjectAssignment(S, OpLoc, Opc, LHSExpr, RHSExpr); // Don't resolve overloads if the other type is overloadable. if (getLangOpts().CPlusPlus && pty->getKind() == BuiltinType::Overload) { // We can't actually test that if we still have a placeholder, // though. Fortunately, none of the exceptions we see in that // code below are valid when the LHS is an overload set. Note // that an overload set can be dependently-typed, but it never // instantiates to having an overloadable type. ExprResult resolvedRHS = CheckPlaceholderExpr(RHSExpr); if (resolvedRHS.isInvalid()) return ExprError(); RHSExpr = resolvedRHS.get(); if (RHSExpr->isTypeDependent() || RHSExpr->getType()->isOverloadableType()) return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr); } // If we're instantiating "a.x < b" or "A::x < b" and 'x' names a function // template, diagnose the missing 'template' keyword instead of diagnosing // an invalid use of a bound member function. // // Note that "A::x < b" might be valid if 'b' has an overloadable type due // to C++1z [over.over]/1.4, but we already checked for that case above. if (Opc == BO_LT && inTemplateInstantiation() && (pty->getKind() == BuiltinType::BoundMember || pty->getKind() == BuiltinType::Overload)) { auto *OE = dyn_cast(LHSExpr); if (OE && !OE->hasTemplateKeyword() && !OE->hasExplicitTemplateArgs() && llvm::any_of(OE->decls(), [](NamedDecl *ND) { return isa(ND); })) { Diag(OE->getQualifier() ? OE->getQualifierLoc().getBeginLoc() : OE->getNameLoc(), diag::err_template_kw_missing) << OE->getName().getAsString() << ""; return ExprError(); } } ExprResult LHS = CheckPlaceholderExpr(LHSExpr); if (LHS.isInvalid()) return ExprError(); LHSExpr = LHS.get(); } // Handle pseudo-objects in the RHS. if (const BuiltinType *pty = RHSExpr->getType()->getAsPlaceholderType()) { // An overload in the RHS can potentially be resolved by the type // being assigned to. if (Opc == BO_Assign && pty->getKind() == BuiltinType::Overload) { if (getLangOpts().CPlusPlus && (LHSExpr->isTypeDependent() || RHSExpr->isTypeDependent() || LHSExpr->getType()->isOverloadableType())) return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr); return CreateBuiltinBinOp(OpLoc, Opc, LHSExpr, RHSExpr); } // Don't resolve overloads if the other type is overloadable. if (getLangOpts().CPlusPlus && pty->getKind() == BuiltinType::Overload && LHSExpr->getType()->isOverloadableType()) return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr); ExprResult resolvedRHS = CheckPlaceholderExpr(RHSExpr); if (!resolvedRHS.isUsable()) return ExprError(); RHSExpr = resolvedRHS.get(); } if (getLangOpts().CPlusPlus) { // If either expression is type-dependent, always build an // overloaded op. if (LHSExpr->isTypeDependent() || RHSExpr->isTypeDependent()) return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr); // Otherwise, build an overloaded op if either expression has an // overloadable type. if (LHSExpr->getType()->isOverloadableType() || RHSExpr->getType()->isOverloadableType()) return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr); } if (getLangOpts().RecoveryAST && (LHSExpr->isTypeDependent() || RHSExpr->isTypeDependent())) { assert(!getLangOpts().CPlusPlus); assert((LHSExpr->containsErrors() || RHSExpr->containsErrors()) && "Should only occur in error-recovery path."); if (BinaryOperator::isCompoundAssignmentOp(Opc)) // C [6.15.16] p3: // An assignment expression has the value of the left operand after the // assignment, but is not an lvalue. return CompoundAssignOperator::Create( Context, LHSExpr, RHSExpr, Opc, LHSExpr->getType().getUnqualifiedType(), VK_PRValue, OK_Ordinary, OpLoc, CurFPFeatureOverrides()); QualType ResultType; switch (Opc) { case BO_Assign: ResultType = LHSExpr->getType().getUnqualifiedType(); break; case BO_LT: case BO_GT: case BO_LE: case BO_GE: case BO_EQ: case BO_NE: case BO_LAnd: case BO_LOr: // These operators have a fixed result type regardless of operands. ResultType = Context.IntTy; break; case BO_Comma: ResultType = RHSExpr->getType(); break; default: ResultType = Context.DependentTy; break; } return BinaryOperator::Create(Context, LHSExpr, RHSExpr, Opc, ResultType, VK_PRValue, OK_Ordinary, OpLoc, CurFPFeatureOverrides()); } // Build a built-in binary operation. return CreateBuiltinBinOp(OpLoc, Opc, LHSExpr, RHSExpr); } static bool isOverflowingIntegerType(ASTContext &Ctx, QualType T) { if (T.isNull() || T->isDependentType()) return false; if (!T->isPromotableIntegerType()) return true; return Ctx.getIntWidth(T) >= Ctx.getIntWidth(Ctx.IntTy); } ExprResult Sema::CreateBuiltinUnaryOp(SourceLocation OpLoc, UnaryOperatorKind Opc, Expr *InputExpr) { ExprResult Input = InputExpr; ExprValueKind VK = VK_PRValue; ExprObjectKind OK = OK_Ordinary; QualType resultType; bool CanOverflow = false; bool ConvertHalfVec = false; if (getLangOpts().OpenCL) { QualType Ty = InputExpr->getType(); // The only legal unary operation for atomics is '&'. if ((Opc != UO_AddrOf && Ty->isAtomicType()) || // OpenCL special types - image, sampler, pipe, and blocks are to be used // only with a builtin functions and therefore should be disallowed here. (Ty->isImageType() || Ty->isSamplerT() || Ty->isPipeType() || Ty->isBlockPointerType())) { return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr) << InputExpr->getType() << Input.get()->getSourceRange()); } } if (getLangOpts().HLSL) { if (Opc == UO_AddrOf) return ExprError(Diag(OpLoc, diag::err_hlsl_operator_unsupported) << 0); if (Opc == UO_Deref) return ExprError(Diag(OpLoc, diag::err_hlsl_operator_unsupported) << 1); } switch (Opc) { case UO_PreInc: case UO_PreDec: case UO_PostInc: case UO_PostDec: resultType = CheckIncrementDecrementOperand(*this, Input.get(), VK, OK, OpLoc, Opc == UO_PreInc || Opc == UO_PostInc, Opc == UO_PreInc || Opc == UO_PreDec); CanOverflow = isOverflowingIntegerType(Context, resultType); break; case UO_AddrOf: resultType = CheckAddressOfOperand(Input, OpLoc); CheckAddressOfNoDeref(InputExpr); RecordModifiableNonNullParam(*this, InputExpr); break; case UO_Deref: { Input = DefaultFunctionArrayLvalueConversion(Input.get()); if (Input.isInvalid()) return ExprError(); resultType = CheckIndirectionOperand(*this, Input.get(), VK, OpLoc); break; } case UO_Plus: case UO_Minus: CanOverflow = Opc == UO_Minus && isOverflowingIntegerType(Context, Input.get()->getType()); Input = UsualUnaryConversions(Input.get()); if (Input.isInvalid()) return ExprError(); // Unary plus and minus require promoting an operand of half vector to a // float vector and truncating the result back to a half vector. For now, we // do this only when HalfArgsAndReturns is set (that is, when the target is // arm or arm64). ConvertHalfVec = needsConversionOfHalfVec(true, Context, Input.get()); // If the operand is a half vector, promote it to a float vector. if (ConvertHalfVec) Input = convertVector(Input.get(), Context.FloatTy, *this); resultType = Input.get()->getType(); if (resultType->isDependentType()) break; if (resultType->isArithmeticType()) // C99 6.5.3.3p1 break; else if (resultType->isVectorType() && // The z vector extensions don't allow + or - with bool vectors. (!Context.getLangOpts().ZVector || resultType->castAs()->getVectorKind() != VectorType::AltiVecBool)) break; else if (resultType->isVLSTBuiltinType()) // SVE vectors allow + and - break; else if (getLangOpts().CPlusPlus && // C++ [expr.unary.op]p6 Opc == UO_Plus && resultType->isPointerType()) break; return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr) << resultType << Input.get()->getSourceRange()); case UO_Not: // bitwise complement Input = UsualUnaryConversions(Input.get()); if (Input.isInvalid()) return ExprError(); resultType = Input.get()->getType(); if (resultType->isDependentType()) break; // C99 6.5.3.3p1. We allow complex int and float as a GCC extension. if (resultType->isComplexType() || resultType->isComplexIntegerType()) // C99 does not support '~' for complex conjugation. Diag(OpLoc, diag::ext_integer_complement_complex) << resultType << Input.get()->getSourceRange(); else if (resultType->hasIntegerRepresentation()) break; else if (resultType->isExtVectorType() && Context.getLangOpts().OpenCL) { // OpenCL v1.1 s6.3.f: The bitwise operator not (~) does not operate // on vector float types. QualType T = resultType->castAs()->getElementType(); if (!T->isIntegerType()) return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr) << resultType << Input.get()->getSourceRange()); } else { return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr) << resultType << Input.get()->getSourceRange()); } break; case UO_LNot: // logical negation // Unlike +/-/~, integer promotions aren't done here (C99 6.5.3.3p5). Input = DefaultFunctionArrayLvalueConversion(Input.get()); if (Input.isInvalid()) return ExprError(); resultType = Input.get()->getType(); // Though we still have to promote half FP to float... if (resultType->isHalfType() && !Context.getLangOpts().NativeHalfType) { Input = ImpCastExprToType(Input.get(), Context.FloatTy, CK_FloatingCast).get(); resultType = Context.FloatTy; } if (resultType->isDependentType()) break; if (resultType->isScalarType() && !isScopedEnumerationType(resultType)) { // C99 6.5.3.3p1: ok, fallthrough; if (Context.getLangOpts().CPlusPlus) { // C++03 [expr.unary.op]p8, C++0x [expr.unary.op]p9: // operand contextually converted to bool. Input = ImpCastExprToType(Input.get(), Context.BoolTy, ScalarTypeToBooleanCastKind(resultType)); } else if (Context.getLangOpts().OpenCL && Context.getLangOpts().OpenCLVersion < 120) { // OpenCL v1.1 6.3.h: The logical operator not (!) does not // operate on scalar float types. if (!resultType->isIntegerType() && !resultType->isPointerType()) return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr) << resultType << Input.get()->getSourceRange()); } } else if (resultType->isExtVectorType()) { if (Context.getLangOpts().OpenCL && Context.getLangOpts().getOpenCLCompatibleVersion() < 120) { // OpenCL v1.1 6.3.h: The logical operator not (!) does not // operate on vector float types. QualType T = resultType->castAs()->getElementType(); if (!T->isIntegerType()) return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr) << resultType << Input.get()->getSourceRange()); } // Vector logical not returns the signed variant of the operand type. resultType = GetSignedVectorType(resultType); break; } else if (Context.getLangOpts().CPlusPlus && resultType->isVectorType()) { const VectorType *VTy = resultType->castAs(); if (VTy->getVectorKind() != VectorType::GenericVector) return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr) << resultType << Input.get()->getSourceRange()); // Vector logical not returns the signed variant of the operand type. resultType = GetSignedVectorType(resultType); break; } else { return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr) << resultType << Input.get()->getSourceRange()); } // LNot always has type int. C99 6.5.3.3p5. // In C++, it's bool. C++ 5.3.1p8 resultType = Context.getLogicalOperationType(); break; case UO_Real: case UO_Imag: resultType = CheckRealImagOperand(*this, Input, OpLoc, Opc == UO_Real); // _Real maps ordinary l-values into ordinary l-values. _Imag maps ordinary // complex l-values to ordinary l-values and all other values to r-values. if (Input.isInvalid()) return ExprError(); if (Opc == UO_Real || Input.get()->getType()->isAnyComplexType()) { if (Input.get()->isGLValue() && Input.get()->getObjectKind() == OK_Ordinary) VK = Input.get()->getValueKind(); } else if (!getLangOpts().CPlusPlus) { // In C, a volatile scalar is read by __imag. In C++, it is not. Input = DefaultLvalueConversion(Input.get()); } break; case UO_Extension: resultType = Input.get()->getType(); VK = Input.get()->getValueKind(); OK = Input.get()->getObjectKind(); break; case UO_Coawait: // It's unnecessary to represent the pass-through operator co_await in the // AST; just return the input expression instead. assert(!Input.get()->getType()->isDependentType() && "the co_await expression must be non-dependant before " "building operator co_await"); return Input; } if (resultType.isNull() || Input.isInvalid()) return ExprError(); // Check for array bounds violations in the operand of the UnaryOperator, // except for the '*' and '&' operators that have to be handled specially // by CheckArrayAccess (as there are special cases like &array[arraysize] // that are explicitly defined as valid by the standard). if (Opc != UO_AddrOf && Opc != UO_Deref) CheckArrayAccess(Input.get()); auto *UO = UnaryOperator::Create(Context, Input.get(), Opc, resultType, VK, OK, OpLoc, CanOverflow, CurFPFeatureOverrides()); if (Opc == UO_Deref && UO->getType()->hasAttr(attr::NoDeref) && !isa(UO->getType().getDesugaredType(Context)) && !isUnevaluatedContext()) ExprEvalContexts.back().PossibleDerefs.insert(UO); // Convert the result back to a half vector. if (ConvertHalfVec) return convertVector(UO, Context.HalfTy, *this); return UO; } /// Determine whether the given expression is a qualified member /// access expression, of a form that could be turned into a pointer to member /// with the address-of operator. bool Sema::isQualifiedMemberAccess(Expr *E) { if (DeclRefExpr *DRE = dyn_cast(E)) { if (!DRE->getQualifier()) return false; ValueDecl *VD = DRE->getDecl(); if (!VD->isCXXClassMember()) return false; if (isa(VD) || isa(VD)) return true; if (CXXMethodDecl *Method = dyn_cast(VD)) return Method->isInstance(); return false; } if (UnresolvedLookupExpr *ULE = dyn_cast(E)) { if (!ULE->getQualifier()) return false; for (NamedDecl *D : ULE->decls()) { if (CXXMethodDecl *Method = dyn_cast(D)) { if (Method->isInstance()) return true; } else { // Overload set does not contain methods. break; } } return false; } return false; } ExprResult Sema::BuildUnaryOp(Scope *S, SourceLocation OpLoc, UnaryOperatorKind Opc, Expr *Input) { // First things first: handle placeholders so that the // overloaded-operator check considers the right type. if (const BuiltinType *pty = Input->getType()->getAsPlaceholderType()) { // Increment and decrement of pseudo-object references. if (pty->getKind() == BuiltinType::PseudoObject && UnaryOperator::isIncrementDecrementOp(Opc)) return checkPseudoObjectIncDec(S, OpLoc, Opc, Input); // extension is always a builtin operator. if (Opc == UO_Extension) return CreateBuiltinUnaryOp(OpLoc, Opc, Input); // & gets special logic for several kinds of placeholder. // The builtin code knows what to do. if (Opc == UO_AddrOf && (pty->getKind() == BuiltinType::Overload || pty->getKind() == BuiltinType::UnknownAny || pty->getKind() == BuiltinType::BoundMember)) return CreateBuiltinUnaryOp(OpLoc, Opc, Input); // Anything else needs to be handled now. ExprResult Result = CheckPlaceholderExpr(Input); if (Result.isInvalid()) return ExprError(); Input = Result.get(); } if (getLangOpts().CPlusPlus && Input->getType()->isOverloadableType() && UnaryOperator::getOverloadedOperator(Opc) != OO_None && !(Opc == UO_AddrOf && isQualifiedMemberAccess(Input))) { // Find all of the overloaded operators visible from this point. UnresolvedSet<16> Functions; OverloadedOperatorKind OverOp = UnaryOperator::getOverloadedOperator(Opc); if (S && OverOp != OO_None) LookupOverloadedOperatorName(OverOp, S, Functions); return CreateOverloadedUnaryOp(OpLoc, Opc, Functions, Input); } return CreateBuiltinUnaryOp(OpLoc, Opc, Input); } // Unary Operators. 'Tok' is the token for the operator. ExprResult Sema::ActOnUnaryOp(Scope *S, SourceLocation OpLoc, tok::TokenKind Op, Expr *Input) { return BuildUnaryOp(S, OpLoc, ConvertTokenKindToUnaryOpcode(Op), Input); } /// ActOnAddrLabel - Parse the GNU address of label extension: "&&foo". ExprResult Sema::ActOnAddrLabel(SourceLocation OpLoc, SourceLocation LabLoc, LabelDecl *TheDecl) { TheDecl->markUsed(Context); // Create the AST node. The address of a label always has type 'void*'. return new (Context) AddrLabelExpr(OpLoc, LabLoc, TheDecl, Context.getPointerType(Context.VoidTy)); } void Sema::ActOnStartStmtExpr() { PushExpressionEvaluationContext(ExprEvalContexts.back().Context); } void Sema::ActOnStmtExprError() { // Note that function is also called by TreeTransform when leaving a // StmtExpr scope without rebuilding anything. DiscardCleanupsInEvaluationContext(); PopExpressionEvaluationContext(); } ExprResult Sema::ActOnStmtExpr(Scope *S, SourceLocation LPLoc, Stmt *SubStmt, SourceLocation RPLoc) { return BuildStmtExpr(LPLoc, SubStmt, RPLoc, getTemplateDepth(S)); } ExprResult Sema::BuildStmtExpr(SourceLocation LPLoc, Stmt *SubStmt, SourceLocation RPLoc, unsigned TemplateDepth) { assert(SubStmt && isa(SubStmt) && "Invalid action invocation!"); CompoundStmt *Compound = cast(SubStmt); if (hasAnyUnrecoverableErrorsInThisFunction()) DiscardCleanupsInEvaluationContext(); assert(!Cleanup.exprNeedsCleanups() && "cleanups within StmtExpr not correctly bound!"); PopExpressionEvaluationContext(); // FIXME: there are a variety of strange constraints to enforce here, for // example, it is not possible to goto into a stmt expression apparently. // More semantic analysis is needed. // If there are sub-stmts in the compound stmt, take the type of the last one // as the type of the stmtexpr. QualType Ty = Context.VoidTy; bool StmtExprMayBindToTemp = false; if (!Compound->body_empty()) { // For GCC compatibility we get the last Stmt excluding trailing NullStmts. if (const auto *LastStmt = dyn_cast(Compound->getStmtExprResult())) { if (const Expr *Value = LastStmt->getExprStmt()) { StmtExprMayBindToTemp = true; Ty = Value->getType(); } } } // FIXME: Check that expression type is complete/non-abstract; statement // expressions are not lvalues. Expr *ResStmtExpr = new (Context) StmtExpr(Compound, Ty, LPLoc, RPLoc, TemplateDepth); if (StmtExprMayBindToTemp) return MaybeBindToTemporary(ResStmtExpr); return ResStmtExpr; } ExprResult Sema::ActOnStmtExprResult(ExprResult ER) { if (ER.isInvalid()) return ExprError(); // Do function/array conversion on the last expression, but not // lvalue-to-rvalue. However, initialize an unqualified type. ER = DefaultFunctionArrayConversion(ER.get()); if (ER.isInvalid()) return ExprError(); Expr *E = ER.get(); if (E->isTypeDependent()) return E; // In ARC, if the final expression ends in a consume, splice // the consume out and bind it later. In the alternate case // (when dealing with a retainable type), the result // initialization will create a produce. In both cases the // result will be +1, and we'll need to balance that out with // a bind. auto *Cast = dyn_cast(E); if (Cast && Cast->getCastKind() == CK_ARCConsumeObject) return Cast->getSubExpr(); // FIXME: Provide a better location for the initialization. return PerformCopyInitialization( InitializedEntity::InitializeStmtExprResult( E->getBeginLoc(), E->getType().getUnqualifiedType()), SourceLocation(), E); } ExprResult Sema::BuildBuiltinOffsetOf(SourceLocation BuiltinLoc, TypeSourceInfo *TInfo, ArrayRef Components, SourceLocation RParenLoc) { QualType ArgTy = TInfo->getType(); bool Dependent = ArgTy->isDependentType(); SourceRange TypeRange = TInfo->getTypeLoc().getLocalSourceRange(); // We must have at least one component that refers to the type, and the first // one is known to be a field designator. Verify that the ArgTy represents // a struct/union/class. if (!Dependent && !ArgTy->isRecordType()) return ExprError(Diag(BuiltinLoc, diag::err_offsetof_record_type) << ArgTy << TypeRange); // Type must be complete per C99 7.17p3 because a declaring a variable // with an incomplete type would be ill-formed. if (!Dependent && RequireCompleteType(BuiltinLoc, ArgTy, diag::err_offsetof_incomplete_type, TypeRange)) return ExprError(); bool DidWarnAboutNonPOD = false; QualType CurrentType = ArgTy; SmallVector Comps; SmallVector Exprs; for (const OffsetOfComponent &OC : Components) { if (OC.isBrackets) { // Offset of an array sub-field. TODO: Should we allow vector elements? if (!CurrentType->isDependentType()) { const ArrayType *AT = Context.getAsArrayType(CurrentType); if(!AT) return ExprError(Diag(OC.LocEnd, diag::err_offsetof_array_type) << CurrentType); CurrentType = AT->getElementType(); } else CurrentType = Context.DependentTy; ExprResult IdxRval = DefaultLvalueConversion(static_cast(OC.U.E)); if (IdxRval.isInvalid()) return ExprError(); Expr *Idx = IdxRval.get(); // The expression must be an integral expression. // FIXME: An integral constant expression? if (!Idx->isTypeDependent() && !Idx->isValueDependent() && !Idx->getType()->isIntegerType()) return ExprError( Diag(Idx->getBeginLoc(), diag::err_typecheck_subscript_not_integer) << Idx->getSourceRange()); // Record this array index. Comps.push_back(OffsetOfNode(OC.LocStart, Exprs.size(), OC.LocEnd)); Exprs.push_back(Idx); continue; } // Offset of a field. if (CurrentType->isDependentType()) { // We have the offset of a field, but we can't look into the dependent // type. Just record the identifier of the field. Comps.push_back(OffsetOfNode(OC.LocStart, OC.U.IdentInfo, OC.LocEnd)); CurrentType = Context.DependentTy; continue; } // We need to have a complete type to look into. if (RequireCompleteType(OC.LocStart, CurrentType, diag::err_offsetof_incomplete_type)) return ExprError(); // Look for the designated field. const RecordType *RC = CurrentType->getAs(); if (!RC) return ExprError(Diag(OC.LocEnd, diag::err_offsetof_record_type) << CurrentType); RecordDecl *RD = RC->getDecl(); // C++ [lib.support.types]p5: // The macro offsetof accepts a restricted set of type arguments in this // International Standard. type shall be a POD structure or a POD union // (clause 9). // C++11 [support.types]p4: // If type is not a standard-layout class (Clause 9), the results are // undefined. if (CXXRecordDecl *CRD = dyn_cast(RD)) { bool IsSafe = LangOpts.CPlusPlus11? CRD->isStandardLayout() : CRD->isPOD(); unsigned DiagID = LangOpts.CPlusPlus11? diag::ext_offsetof_non_standardlayout_type : diag::ext_offsetof_non_pod_type; if (!IsSafe && !DidWarnAboutNonPOD && DiagRuntimeBehavior(BuiltinLoc, nullptr, PDiag(DiagID) << SourceRange(Components[0].LocStart, OC.LocEnd) << CurrentType)) DidWarnAboutNonPOD = true; } // Look for the field. LookupResult R(*this, OC.U.IdentInfo, OC.LocStart, LookupMemberName); LookupQualifiedName(R, RD); FieldDecl *MemberDecl = R.getAsSingle(); IndirectFieldDecl *IndirectMemberDecl = nullptr; if (!MemberDecl) { if ((IndirectMemberDecl = R.getAsSingle())) MemberDecl = IndirectMemberDecl->getAnonField(); } if (!MemberDecl) return ExprError(Diag(BuiltinLoc, diag::err_no_member) << OC.U.IdentInfo << RD << SourceRange(OC.LocStart, OC.LocEnd)); // C99 7.17p3: // (If the specified member is a bit-field, the behavior is undefined.) // // We diagnose this as an error. if (MemberDecl->isBitField()) { Diag(OC.LocEnd, diag::err_offsetof_bitfield) << MemberDecl->getDeclName() << SourceRange(BuiltinLoc, RParenLoc); Diag(MemberDecl->getLocation(), diag::note_bitfield_decl); return ExprError(); } RecordDecl *Parent = MemberDecl->getParent(); if (IndirectMemberDecl) Parent = cast(IndirectMemberDecl->getDeclContext()); // If the member was found in a base class, introduce OffsetOfNodes for // the base class indirections. CXXBasePaths Paths; if (IsDerivedFrom(OC.LocStart, CurrentType, Context.getTypeDeclType(Parent), Paths)) { if (Paths.getDetectedVirtual()) { Diag(OC.LocEnd, diag::err_offsetof_field_of_virtual_base) << MemberDecl->getDeclName() << SourceRange(BuiltinLoc, RParenLoc); return ExprError(); } CXXBasePath &Path = Paths.front(); for (const CXXBasePathElement &B : Path) Comps.push_back(OffsetOfNode(B.Base)); } if (IndirectMemberDecl) { for (auto *FI : IndirectMemberDecl->chain()) { assert(isa(FI)); Comps.push_back(OffsetOfNode(OC.LocStart, cast(FI), OC.LocEnd)); } } else Comps.push_back(OffsetOfNode(OC.LocStart, MemberDecl, OC.LocEnd)); CurrentType = MemberDecl->getType().getNonReferenceType(); } return OffsetOfExpr::Create(Context, Context.getSizeType(), BuiltinLoc, TInfo, Comps, Exprs, RParenLoc); } ExprResult Sema::ActOnBuiltinOffsetOf(Scope *S, SourceLocation BuiltinLoc, SourceLocation TypeLoc, ParsedType ParsedArgTy, ArrayRef Components, SourceLocation RParenLoc) { TypeSourceInfo *ArgTInfo; QualType ArgTy = GetTypeFromParser(ParsedArgTy, &ArgTInfo); if (ArgTy.isNull()) return ExprError(); if (!ArgTInfo) ArgTInfo = Context.getTrivialTypeSourceInfo(ArgTy, TypeLoc); return BuildBuiltinOffsetOf(BuiltinLoc, ArgTInfo, Components, RParenLoc); } ExprResult Sema::ActOnChooseExpr(SourceLocation BuiltinLoc, Expr *CondExpr, Expr *LHSExpr, Expr *RHSExpr, SourceLocation RPLoc) { assert((CondExpr && LHSExpr && RHSExpr) && "Missing type argument(s)"); ExprValueKind VK = VK_PRValue; ExprObjectKind OK = OK_Ordinary; QualType resType; bool CondIsTrue = false; if (CondExpr->isTypeDependent() || CondExpr->isValueDependent()) { resType = Context.DependentTy; } else { // The conditional expression is required to be a constant expression. llvm::APSInt condEval(32); ExprResult CondICE = VerifyIntegerConstantExpression( CondExpr, &condEval, diag::err_typecheck_choose_expr_requires_constant); if (CondICE.isInvalid()) return ExprError(); CondExpr = CondICE.get(); CondIsTrue = condEval.getZExtValue(); // If the condition is > zero, then the AST type is the same as the LHSExpr. Expr *ActiveExpr = CondIsTrue ? LHSExpr : RHSExpr; resType = ActiveExpr->getType(); VK = ActiveExpr->getValueKind(); OK = ActiveExpr->getObjectKind(); } return new (Context) ChooseExpr(BuiltinLoc, CondExpr, LHSExpr, RHSExpr, resType, VK, OK, RPLoc, CondIsTrue); } //===----------------------------------------------------------------------===// // Clang Extensions. //===----------------------------------------------------------------------===// /// ActOnBlockStart - This callback is invoked when a block literal is started. void Sema::ActOnBlockStart(SourceLocation CaretLoc, Scope *CurScope) { BlockDecl *Block = BlockDecl::Create(Context, CurContext, CaretLoc); if (LangOpts.CPlusPlus) { MangleNumberingContext *MCtx; Decl *ManglingContextDecl; std::tie(MCtx, ManglingContextDecl) = getCurrentMangleNumberContext(Block->getDeclContext()); if (MCtx) { unsigned ManglingNumber = MCtx->getManglingNumber(Block); Block->setBlockMangling(ManglingNumber, ManglingContextDecl); } } PushBlockScope(CurScope, Block); CurContext->addDecl(Block); if (CurScope) PushDeclContext(CurScope, Block); else CurContext = Block; getCurBlock()->HasImplicitReturnType = true; // Enter a new evaluation context to insulate the block from any // cleanups from the enclosing full-expression. PushExpressionEvaluationContext( ExpressionEvaluationContext::PotentiallyEvaluated); } void Sema::ActOnBlockArguments(SourceLocation CaretLoc, Declarator &ParamInfo, Scope *CurScope) { assert(ParamInfo.getIdentifier() == nullptr && "block-id should have no identifier!"); assert(ParamInfo.getContext() == DeclaratorContext::BlockLiteral); BlockScopeInfo *CurBlock = getCurBlock(); TypeSourceInfo *Sig = GetTypeForDeclarator(ParamInfo, CurScope); QualType T = Sig->getType(); // FIXME: We should allow unexpanded parameter packs here, but that would, // in turn, make the block expression contain unexpanded parameter packs. if (DiagnoseUnexpandedParameterPack(CaretLoc, Sig, UPPC_Block)) { // Drop the parameters. FunctionProtoType::ExtProtoInfo EPI; EPI.HasTrailingReturn = false; EPI.TypeQuals.addConst(); T = Context.getFunctionType(Context.DependentTy, None, EPI); Sig = Context.getTrivialTypeSourceInfo(T); } // GetTypeForDeclarator always produces a function type for a block // literal signature. Furthermore, it is always a FunctionProtoType // unless the function was written with a typedef. assert(T->isFunctionType() && "GetTypeForDeclarator made a non-function block signature"); // Look for an explicit signature in that function type. FunctionProtoTypeLoc ExplicitSignature; if ((ExplicitSignature = Sig->getTypeLoc() .getAsAdjusted())) { // Check whether that explicit signature was synthesized by // GetTypeForDeclarator. If so, don't save that as part of the // written signature. if (ExplicitSignature.getLocalRangeBegin() == ExplicitSignature.getLocalRangeEnd()) { // This would be much cheaper if we stored TypeLocs instead of // TypeSourceInfos. TypeLoc Result = ExplicitSignature.getReturnLoc(); unsigned Size = Result.getFullDataSize(); Sig = Context.CreateTypeSourceInfo(Result.getType(), Size); Sig->getTypeLoc().initializeFullCopy(Result, Size); ExplicitSignature = FunctionProtoTypeLoc(); } } CurBlock->TheDecl->setSignatureAsWritten(Sig); CurBlock->FunctionType = T; const auto *Fn = T->castAs(); QualType RetTy = Fn->getReturnType(); bool isVariadic = (isa(Fn) && cast(Fn)->isVariadic()); CurBlock->TheDecl->setIsVariadic(isVariadic); // Context.DependentTy is used as a placeholder for a missing block // return type. TODO: what should we do with declarators like: // ^ * { ... } // If the answer is "apply template argument deduction".... if (RetTy != Context.DependentTy) { CurBlock->ReturnType = RetTy; CurBlock->TheDecl->setBlockMissingReturnType(false); CurBlock->HasImplicitReturnType = false; } // Push block parameters from the declarator if we had them. SmallVector Params; if (ExplicitSignature) { for (unsigned I = 0, E = ExplicitSignature.getNumParams(); I != E; ++I) { ParmVarDecl *Param = ExplicitSignature.getParam(I); if (Param->getIdentifier() == nullptr && !Param->isImplicit() && !Param->isInvalidDecl() && !getLangOpts().CPlusPlus) { // Diagnose this as an extension in C17 and earlier. if (!getLangOpts().C2x) Diag(Param->getLocation(), diag::ext_parameter_name_omitted_c2x); } Params.push_back(Param); } // Fake up parameter variables if we have a typedef, like // ^ fntype { ... } } else if (const FunctionProtoType *Fn = T->getAs()) { for (const auto &I : Fn->param_types()) { ParmVarDecl *Param = BuildParmVarDeclForTypedef( CurBlock->TheDecl, ParamInfo.getBeginLoc(), I); Params.push_back(Param); } } // Set the parameters on the block decl. if (!Params.empty()) { CurBlock->TheDecl->setParams(Params); CheckParmsForFunctionDef(CurBlock->TheDecl->parameters(), /*CheckParameterNames=*/false); } // Finally we can process decl attributes. ProcessDeclAttributes(CurScope, CurBlock->TheDecl, ParamInfo); // Put the parameter variables in scope. for (auto AI : CurBlock->TheDecl->parameters()) { AI->setOwningFunction(CurBlock->TheDecl); // If this has an identifier, add it to the scope stack. if (AI->getIdentifier()) { CheckShadow(CurBlock->TheScope, AI); PushOnScopeChains(AI, CurBlock->TheScope); } } } /// ActOnBlockError - If there is an error parsing a block, this callback /// is invoked to pop the information about the block from the action impl. void Sema::ActOnBlockError(SourceLocation CaretLoc, Scope *CurScope) { // Leave the expression-evaluation context. DiscardCleanupsInEvaluationContext(); PopExpressionEvaluationContext(); // Pop off CurBlock, handle nested blocks. PopDeclContext(); PopFunctionScopeInfo(); } /// ActOnBlockStmtExpr - This is called when the body of a block statement /// literal was successfully completed. ^(int x){...} ExprResult Sema::ActOnBlockStmtExpr(SourceLocation CaretLoc, Stmt *Body, Scope *CurScope) { // If blocks are disabled, emit an error. if (!LangOpts.Blocks) Diag(CaretLoc, diag::err_blocks_disable) << LangOpts.OpenCL; // Leave the expression-evaluation context. if (hasAnyUnrecoverableErrorsInThisFunction()) DiscardCleanupsInEvaluationContext(); assert(!Cleanup.exprNeedsCleanups() && "cleanups within block not correctly bound!"); PopExpressionEvaluationContext(); BlockScopeInfo *BSI = cast(FunctionScopes.back()); BlockDecl *BD = BSI->TheDecl; if (BSI->HasImplicitReturnType) deduceClosureReturnType(*BSI); QualType RetTy = Context.VoidTy; if (!BSI->ReturnType.isNull()) RetTy = BSI->ReturnType; bool NoReturn = BD->hasAttr(); QualType BlockTy; // If the user wrote a function type in some form, try to use that. if (!BSI->FunctionType.isNull()) { const FunctionType *FTy = BSI->FunctionType->castAs(); FunctionType::ExtInfo Ext = FTy->getExtInfo(); if (NoReturn && !Ext.getNoReturn()) Ext = Ext.withNoReturn(true); // Turn protoless block types into nullary block types. if (isa(FTy)) { FunctionProtoType::ExtProtoInfo EPI; EPI.ExtInfo = Ext; BlockTy = Context.getFunctionType(RetTy, None, EPI); // Otherwise, if we don't need to change anything about the function type, // preserve its sugar structure. } else if (FTy->getReturnType() == RetTy && (!NoReturn || FTy->getNoReturnAttr())) { BlockTy = BSI->FunctionType; // Otherwise, make the minimal modifications to the function type. } else { const FunctionProtoType *FPT = cast(FTy); FunctionProtoType::ExtProtoInfo EPI = FPT->getExtProtoInfo(); EPI.TypeQuals = Qualifiers(); EPI.ExtInfo = Ext; BlockTy = Context.getFunctionType(RetTy, FPT->getParamTypes(), EPI); } // If we don't have a function type, just build one from nothing. } else { FunctionProtoType::ExtProtoInfo EPI; EPI.ExtInfo = FunctionType::ExtInfo().withNoReturn(NoReturn); BlockTy = Context.getFunctionType(RetTy, None, EPI); } DiagnoseUnusedParameters(BD->parameters()); BlockTy = Context.getBlockPointerType(BlockTy); // If needed, diagnose invalid gotos and switches in the block. if (getCurFunction()->NeedsScopeChecking() && !PP.isCodeCompletionEnabled()) DiagnoseInvalidJumps(cast(Body)); BD->setBody(cast(Body)); if (Body && getCurFunction()->HasPotentialAvailabilityViolations) DiagnoseUnguardedAvailabilityViolations(BD); // Try to apply the named return value optimization. We have to check again // if we can do this, though, because blocks keep return statements around // to deduce an implicit return type. if (getLangOpts().CPlusPlus && RetTy->isRecordType() && !BD->isDependentContext()) computeNRVO(Body, BSI); if (RetTy.hasNonTrivialToPrimitiveDestructCUnion() || RetTy.hasNonTrivialToPrimitiveCopyCUnion()) checkNonTrivialCUnion(RetTy, BD->getCaretLocation(), NTCUC_FunctionReturn, NTCUK_Destruct|NTCUK_Copy); PopDeclContext(); // Set the captured variables on the block. SmallVector Captures; for (Capture &Cap : BSI->Captures) { if (Cap.isInvalid() || Cap.isThisCapture()) continue; VarDecl *Var = Cap.getVariable(); Expr *CopyExpr = nullptr; if (getLangOpts().CPlusPlus && Cap.isCopyCapture()) { if (const RecordType *Record = Cap.getCaptureType()->getAs()) { // The capture logic needs the destructor, so make sure we mark it. // Usually this is unnecessary because most local variables have // their destructors marked at declaration time, but parameters are // an exception because it's technically only the call site that // actually requires the destructor. if (isa(Var)) FinalizeVarWithDestructor(Var, Record); // Enter a separate potentially-evaluated context while building block // initializers to isolate their cleanups from those of the block // itself. // FIXME: Is this appropriate even when the block itself occurs in an // unevaluated operand? EnterExpressionEvaluationContext EvalContext( *this, ExpressionEvaluationContext::PotentiallyEvaluated); SourceLocation Loc = Cap.getLocation(); ExprResult Result = BuildDeclarationNameExpr( CXXScopeSpec(), DeclarationNameInfo(Var->getDeclName(), Loc), Var); // According to the blocks spec, the capture of a variable from // the stack requires a const copy constructor. This is not true // of the copy/move done to move a __block variable to the heap. if (!Result.isInvalid() && !Result.get()->getType().isConstQualified()) { Result = ImpCastExprToType(Result.get(), Result.get()->getType().withConst(), CK_NoOp, VK_LValue); } if (!Result.isInvalid()) { Result = PerformCopyInitialization( InitializedEntity::InitializeBlock(Var->getLocation(), Cap.getCaptureType()), Loc, Result.get()); } // Build a full-expression copy expression if initialization // succeeded and used a non-trivial constructor. Recover from // errors by pretending that the copy isn't necessary. if (!Result.isInvalid() && !cast(Result.get())->getConstructor() ->isTrivial()) { Result = MaybeCreateExprWithCleanups(Result); CopyExpr = Result.get(); } } } BlockDecl::Capture NewCap(Var, Cap.isBlockCapture(), Cap.isNested(), CopyExpr); Captures.push_back(NewCap); } BD->setCaptures(Context, Captures, BSI->CXXThisCaptureIndex != 0); // Pop the block scope now but keep it alive to the end of this function. AnalysisBasedWarnings::Policy WP = AnalysisWarnings.getDefaultPolicy(); PoppedFunctionScopePtr ScopeRAII = PopFunctionScopeInfo(&WP, BD, BlockTy); BlockExpr *Result = new (Context) BlockExpr(BD, BlockTy); // If the block isn't obviously global, i.e. it captures anything at // all, then we need to do a few things in the surrounding context: if (Result->getBlockDecl()->hasCaptures()) { // First, this expression has a new cleanup object. ExprCleanupObjects.push_back(Result->getBlockDecl()); Cleanup.setExprNeedsCleanups(true); // It also gets a branch-protected scope if any of the captured // variables needs destruction. for (const auto &CI : Result->getBlockDecl()->captures()) { const VarDecl *var = CI.getVariable(); if (var->getType().isDestructedType() != QualType::DK_none) { setFunctionHasBranchProtectedScope(); break; } } } if (getCurFunction()) getCurFunction()->addBlock(BD); return Result; } ExprResult Sema::ActOnVAArg(SourceLocation BuiltinLoc, Expr *E, ParsedType Ty, SourceLocation RPLoc) { TypeSourceInfo *TInfo; GetTypeFromParser(Ty, &TInfo); return BuildVAArgExpr(BuiltinLoc, E, TInfo, RPLoc); } ExprResult Sema::BuildVAArgExpr(SourceLocation BuiltinLoc, Expr *E, TypeSourceInfo *TInfo, SourceLocation RPLoc) { Expr *OrigExpr = E; bool IsMS = false; // CUDA device code does not support varargs. if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { if (const FunctionDecl *F = dyn_cast(CurContext)) { CUDAFunctionTarget T = IdentifyCUDATarget(F); if (T == CFT_Global || T == CFT_Device || T == CFT_HostDevice) return ExprError(Diag(E->getBeginLoc(), diag::err_va_arg_in_device)); } } // NVPTX does not support va_arg expression. if (getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice && Context.getTargetInfo().getTriple().isNVPTX()) targetDiag(E->getBeginLoc(), diag::err_va_arg_in_device); // It might be a __builtin_ms_va_list. (But don't ever mark a va_arg() // as Microsoft ABI on an actual Microsoft platform, where // __builtin_ms_va_list and __builtin_va_list are the same.) if (!E->isTypeDependent() && Context.getTargetInfo().hasBuiltinMSVaList() && Context.getTargetInfo().getBuiltinVaListKind() != TargetInfo::CharPtrBuiltinVaList) { QualType MSVaListType = Context.getBuiltinMSVaListType(); if (Context.hasSameType(MSVaListType, E->getType())) { if (CheckForModifiableLvalue(E, BuiltinLoc, *this)) return ExprError(); IsMS = true; } } // Get the va_list type QualType VaListType = Context.getBuiltinVaListType(); if (!IsMS) { if (VaListType->isArrayType()) { // Deal with implicit array decay; for example, on x86-64, // va_list is an array, but it's supposed to decay to // a pointer for va_arg. VaListType = Context.getArrayDecayedType(VaListType); // Make sure the input expression also decays appropriately. ExprResult Result = UsualUnaryConversions(E); if (Result.isInvalid()) return ExprError(); E = Result.get(); } else if (VaListType->isRecordType() && getLangOpts().CPlusPlus) { // If va_list is a record type and we are compiling in C++ mode, // check the argument using reference binding. InitializedEntity Entity = InitializedEntity::InitializeParameter( Context, Context.getLValueReferenceType(VaListType), false); ExprResult Init = PerformCopyInitialization(Entity, SourceLocation(), E); if (Init.isInvalid()) return ExprError(); E = Init.getAs(); } else { // Otherwise, the va_list argument must be an l-value because // it is modified by va_arg. if (!E->isTypeDependent() && CheckForModifiableLvalue(E, BuiltinLoc, *this)) return ExprError(); } } if (!IsMS && !E->isTypeDependent() && !Context.hasSameType(VaListType, E->getType())) return ExprError( Diag(E->getBeginLoc(), diag::err_first_argument_to_va_arg_not_of_type_va_list) << OrigExpr->getType() << E->getSourceRange()); if (!TInfo->getType()->isDependentType()) { if (RequireCompleteType(TInfo->getTypeLoc().getBeginLoc(), TInfo->getType(), diag::err_second_parameter_to_va_arg_incomplete, TInfo->getTypeLoc())) return ExprError(); if (RequireNonAbstractType(TInfo->getTypeLoc().getBeginLoc(), TInfo->getType(), diag::err_second_parameter_to_va_arg_abstract, TInfo->getTypeLoc())) return ExprError(); if (!TInfo->getType().isPODType(Context)) { Diag(TInfo->getTypeLoc().getBeginLoc(), TInfo->getType()->isObjCLifetimeType() ? diag::warn_second_parameter_to_va_arg_ownership_qualified : diag::warn_second_parameter_to_va_arg_not_pod) << TInfo->getType() << TInfo->getTypeLoc().getSourceRange(); } // Check for va_arg where arguments of the given type will be promoted // (i.e. this va_arg is guaranteed to have undefined behavior). QualType PromoteType; if (TInfo->getType()->isPromotableIntegerType()) { PromoteType = Context.getPromotedIntegerType(TInfo->getType()); // [cstdarg.syn]p1 defers the C++ behavior to what the C standard says, // and C2x 7.16.1.1p2 says, in part: // If type is not compatible with the type of the actual next argument // (as promoted according to the default argument promotions), the // behavior is undefined, except for the following cases: // - both types are pointers to qualified or unqualified versions of // compatible types; // - one type is a signed integer type, the other type is the // corresponding unsigned integer type, and the value is // representable in both types; // - one type is pointer to qualified or unqualified void and the // other is a pointer to a qualified or unqualified character type. // Given that type compatibility is the primary requirement (ignoring // qualifications), you would think we could call typesAreCompatible() // directly to test this. However, in C++, that checks for *same type*, // which causes false positives when passing an enumeration type to // va_arg. Instead, get the underlying type of the enumeration and pass // that. QualType UnderlyingType = TInfo->getType(); if (const auto *ET = UnderlyingType->getAs()) UnderlyingType = ET->getDecl()->getIntegerType(); if (Context.typesAreCompatible(PromoteType, UnderlyingType, /*CompareUnqualified*/ true)) PromoteType = QualType(); // If the types are still not compatible, we need to test whether the // promoted type and the underlying type are the same except for // signedness. Ask the AST for the correctly corresponding type and see // if that's compatible. if (!PromoteType.isNull() && !UnderlyingType->isBooleanType() && PromoteType->isUnsignedIntegerType() != UnderlyingType->isUnsignedIntegerType()) { UnderlyingType = UnderlyingType->isUnsignedIntegerType() ? Context.getCorrespondingSignedType(UnderlyingType) : Context.getCorrespondingUnsignedType(UnderlyingType); if (Context.typesAreCompatible(PromoteType, UnderlyingType, /*CompareUnqualified*/ true)) PromoteType = QualType(); } } if (TInfo->getType()->isSpecificBuiltinType(BuiltinType::Float)) PromoteType = Context.DoubleTy; if (!PromoteType.isNull()) DiagRuntimeBehavior(TInfo->getTypeLoc().getBeginLoc(), E, PDiag(diag::warn_second_parameter_to_va_arg_never_compatible) << TInfo->getType() << PromoteType << TInfo->getTypeLoc().getSourceRange()); } QualType T = TInfo->getType().getNonLValueExprType(Context); return new (Context) VAArgExpr(BuiltinLoc, E, TInfo, RPLoc, T, IsMS); } ExprResult Sema::ActOnGNUNullExpr(SourceLocation TokenLoc) { // The type of __null will be int or long, depending on the size of // pointers on the target. QualType Ty; unsigned pw = Context.getTargetInfo().getPointerWidth(0); if (pw == Context.getTargetInfo().getIntWidth()) Ty = Context.IntTy; else if (pw == Context.getTargetInfo().getLongWidth()) Ty = Context.LongTy; else if (pw == Context.getTargetInfo().getLongLongWidth()) Ty = Context.LongLongTy; else { llvm_unreachable("I don't know size of pointer!"); } return new (Context) GNUNullExpr(Ty, TokenLoc); } static CXXRecordDecl *LookupStdSourceLocationImpl(Sema &S, SourceLocation Loc) { CXXRecordDecl *ImplDecl = nullptr; // Fetch the std::source_location::__impl decl. if (NamespaceDecl *Std = S.getStdNamespace()) { LookupResult ResultSL(S, &S.PP.getIdentifierTable().get("source_location"), Loc, Sema::LookupOrdinaryName); if (S.LookupQualifiedName(ResultSL, Std)) { if (auto *SLDecl = ResultSL.getAsSingle()) { LookupResult ResultImpl(S, &S.PP.getIdentifierTable().get("__impl"), Loc, Sema::LookupOrdinaryName); if ((SLDecl->isCompleteDefinition() || SLDecl->isBeingDefined()) && S.LookupQualifiedName(ResultImpl, SLDecl)) { ImplDecl = ResultImpl.getAsSingle(); } } } } if (!ImplDecl || !ImplDecl->isCompleteDefinition()) { S.Diag(Loc, diag::err_std_source_location_impl_not_found); return nullptr; } // Verify that __impl is a trivial struct type, with no base classes, and with // only the four expected fields. if (ImplDecl->isUnion() || !ImplDecl->isStandardLayout() || ImplDecl->getNumBases() != 0) { S.Diag(Loc, diag::err_std_source_location_impl_malformed); return nullptr; } unsigned Count = 0; for (FieldDecl *F : ImplDecl->fields()) { StringRef Name = F->getName(); if (Name == "_M_file_name") { if (F->getType() != S.Context.getPointerType(S.Context.CharTy.withConst())) break; Count++; } else if (Name == "_M_function_name") { if (F->getType() != S.Context.getPointerType(S.Context.CharTy.withConst())) break; Count++; } else if (Name == "_M_line") { if (!F->getType()->isIntegerType()) break; Count++; } else if (Name == "_M_column") { if (!F->getType()->isIntegerType()) break; Count++; } else { Count = 100; // invalid break; } } if (Count != 4) { S.Diag(Loc, diag::err_std_source_location_impl_malformed); return nullptr; } return ImplDecl; } ExprResult Sema::ActOnSourceLocExpr(SourceLocExpr::IdentKind Kind, SourceLocation BuiltinLoc, SourceLocation RPLoc) { QualType ResultTy; switch (Kind) { case SourceLocExpr::File: case SourceLocExpr::Function: { QualType ArrTy = Context.getStringLiteralArrayType(Context.CharTy, 0); ResultTy = Context.getPointerType(ArrTy->getAsArrayTypeUnsafe()->getElementType()); break; } case SourceLocExpr::Line: case SourceLocExpr::Column: ResultTy = Context.UnsignedIntTy; break; case SourceLocExpr::SourceLocStruct: if (!StdSourceLocationImplDecl) { StdSourceLocationImplDecl = LookupStdSourceLocationImpl(*this, BuiltinLoc); if (!StdSourceLocationImplDecl) return ExprError(); } ResultTy = Context.getPointerType( Context.getRecordType(StdSourceLocationImplDecl).withConst()); break; } return BuildSourceLocExpr(Kind, ResultTy, BuiltinLoc, RPLoc, CurContext); } ExprResult Sema::BuildSourceLocExpr(SourceLocExpr::IdentKind Kind, QualType ResultTy, SourceLocation BuiltinLoc, SourceLocation RPLoc, DeclContext *ParentContext) { return new (Context) SourceLocExpr(Context, Kind, ResultTy, BuiltinLoc, RPLoc, ParentContext); } bool Sema::CheckConversionToObjCLiteral(QualType DstType, Expr *&Exp, bool Diagnose) { if (!getLangOpts().ObjC) return false; const ObjCObjectPointerType *PT = DstType->getAs(); if (!PT) return false; const ObjCInterfaceDecl *ID = PT->getInterfaceDecl(); // Ignore any parens, implicit casts (should only be // array-to-pointer decays), and not-so-opaque values. The last is // important for making this trigger for property assignments. Expr *SrcExpr = Exp->IgnoreParenImpCasts(); if (OpaqueValueExpr *OV = dyn_cast(SrcExpr)) if (OV->getSourceExpr()) SrcExpr = OV->getSourceExpr()->IgnoreParenImpCasts(); if (auto *SL = dyn_cast(SrcExpr)) { if (!PT->isObjCIdType() && !(ID && ID->getIdentifier()->isStr("NSString"))) return false; if (!SL->isOrdinary()) return false; if (Diagnose) { Diag(SL->getBeginLoc(), diag::err_missing_atsign_prefix) << /*string*/0 << FixItHint::CreateInsertion(SL->getBeginLoc(), "@"); Exp = BuildObjCStringLiteral(SL->getBeginLoc(), SL).get(); } return true; } if ((isa(SrcExpr) || isa(SrcExpr) || isa(SrcExpr) || isa(SrcExpr) || isa(SrcExpr)) && !SrcExpr->isNullPointerConstant( getASTContext(), Expr::NPC_NeverValueDependent)) { if (!ID || !ID->getIdentifier()->isStr("NSNumber")) return false; if (Diagnose) { Diag(SrcExpr->getBeginLoc(), diag::err_missing_atsign_prefix) << /*number*/1 << FixItHint::CreateInsertion(SrcExpr->getBeginLoc(), "@"); Expr *NumLit = BuildObjCNumericLiteral(SrcExpr->getBeginLoc(), SrcExpr).get(); if (NumLit) Exp = NumLit; } return true; } return false; } static bool maybeDiagnoseAssignmentToFunction(Sema &S, QualType DstType, const Expr *SrcExpr) { if (!DstType->isFunctionPointerType() || !SrcExpr->getType()->isFunctionType()) return false; auto *DRE = dyn_cast(SrcExpr->IgnoreParenImpCasts()); if (!DRE) return false; auto *FD = dyn_cast(DRE->getDecl()); if (!FD) return false; return !S.checkAddressOfFunctionIsAvailable(FD, /*Complain=*/true, SrcExpr->getBeginLoc()); } bool Sema::DiagnoseAssignmentResult(AssignConvertType ConvTy, SourceLocation Loc, QualType DstType, QualType SrcType, Expr *SrcExpr, AssignmentAction Action, bool *Complained) { if (Complained) *Complained = false; // Decode the result (notice that AST's are still created for extensions). bool CheckInferredResultType = false; bool isInvalid = false; unsigned DiagKind = 0; ConversionFixItGenerator ConvHints; bool MayHaveConvFixit = false; bool MayHaveFunctionDiff = false; const ObjCInterfaceDecl *IFace = nullptr; const ObjCProtocolDecl *PDecl = nullptr; switch (ConvTy) { case Compatible: DiagnoseAssignmentEnum(DstType, SrcType, SrcExpr); return false; case PointerToInt: if (getLangOpts().CPlusPlus) { DiagKind = diag::err_typecheck_convert_pointer_int; isInvalid = true; } else { DiagKind = diag::ext_typecheck_convert_pointer_int; } ConvHints.tryToFixConversion(SrcExpr, SrcType, DstType, *this); MayHaveConvFixit = true; break; case IntToPointer: if (getLangOpts().CPlusPlus) { DiagKind = diag::err_typecheck_convert_int_pointer; isInvalid = true; } else { DiagKind = diag::ext_typecheck_convert_int_pointer; } ConvHints.tryToFixConversion(SrcExpr, SrcType, DstType, *this); MayHaveConvFixit = true; break; case IncompatibleFunctionPointer: if (getLangOpts().CPlusPlus) { DiagKind = diag::err_typecheck_convert_incompatible_function_pointer; isInvalid = true; } else { DiagKind = diag::ext_typecheck_convert_incompatible_function_pointer; } ConvHints.tryToFixConversion(SrcExpr, SrcType, DstType, *this); MayHaveConvFixit = true; break; case IncompatiblePointer: if (Action == AA_Passing_CFAudited) { DiagKind = diag::err_arc_typecheck_convert_incompatible_pointer; } else if (getLangOpts().CPlusPlus) { DiagKind = diag::err_typecheck_convert_incompatible_pointer; isInvalid = true; } else { DiagKind = diag::ext_typecheck_convert_incompatible_pointer; } CheckInferredResultType = DstType->isObjCObjectPointerType() && SrcType->isObjCObjectPointerType(); if (!CheckInferredResultType) { ConvHints.tryToFixConversion(SrcExpr, SrcType, DstType, *this); } else if (CheckInferredResultType) { SrcType = SrcType.getUnqualifiedType(); DstType = DstType.getUnqualifiedType(); } MayHaveConvFixit = true; break; case IncompatiblePointerSign: if (getLangOpts().CPlusPlus) { DiagKind = diag::err_typecheck_convert_incompatible_pointer_sign; isInvalid = true; } else { DiagKind = diag::ext_typecheck_convert_incompatible_pointer_sign; } break; case FunctionVoidPointer: if (getLangOpts().CPlusPlus) { DiagKind = diag::err_typecheck_convert_pointer_void_func; isInvalid = true; } else { DiagKind = diag::ext_typecheck_convert_pointer_void_func; } break; case IncompatiblePointerDiscardsQualifiers: { // Perform array-to-pointer decay if necessary. if (SrcType->isArrayType()) SrcType = Context.getArrayDecayedType(SrcType); isInvalid = true; Qualifiers lhq = SrcType->getPointeeType().getQualifiers(); Qualifiers rhq = DstType->getPointeeType().getQualifiers(); if (lhq.getAddressSpace() != rhq.getAddressSpace()) { DiagKind = diag::err_typecheck_incompatible_address_space; break; } else if (lhq.getObjCLifetime() != rhq.getObjCLifetime()) { DiagKind = diag::err_typecheck_incompatible_ownership; break; } llvm_unreachable("unknown error case for discarding qualifiers!"); // fallthrough } case CompatiblePointerDiscardsQualifiers: // If the qualifiers lost were because we were applying the // (deprecated) C++ conversion from a string literal to a char* // (or wchar_t*), then there was no error (C++ 4.2p2). FIXME: // Ideally, this check would be performed in // checkPointerTypesForAssignment. However, that would require a // bit of refactoring (so that the second argument is an // expression, rather than a type), which should be done as part // of a larger effort to fix checkPointerTypesForAssignment for // C++ semantics. if (getLangOpts().CPlusPlus && IsStringLiteralToNonConstPointerConversion(SrcExpr, DstType)) return false; if (getLangOpts().CPlusPlus) { DiagKind = diag::err_typecheck_convert_discards_qualifiers; isInvalid = true; } else { DiagKind = diag::ext_typecheck_convert_discards_qualifiers; } break; case IncompatibleNestedPointerQualifiers: if (getLangOpts().CPlusPlus) { isInvalid = true; DiagKind = diag::err_nested_pointer_qualifier_mismatch; } else { DiagKind = diag::ext_nested_pointer_qualifier_mismatch; } break; case IncompatibleNestedPointerAddressSpaceMismatch: DiagKind = diag::err_typecheck_incompatible_nested_address_space; isInvalid = true; break; case IntToBlockPointer: DiagKind = diag::err_int_to_block_pointer; isInvalid = true; break; case IncompatibleBlockPointer: DiagKind = diag::err_typecheck_convert_incompatible_block_pointer; isInvalid = true; break; case IncompatibleObjCQualifiedId: { if (SrcType->isObjCQualifiedIdType()) { const ObjCObjectPointerType *srcOPT = SrcType->castAs(); for (auto *srcProto : srcOPT->quals()) { PDecl = srcProto; break; } if (const ObjCInterfaceType *IFaceT = DstType->castAs()->getInterfaceType()) IFace = IFaceT->getDecl(); } else if (DstType->isObjCQualifiedIdType()) { const ObjCObjectPointerType *dstOPT = DstType->castAs(); for (auto *dstProto : dstOPT->quals()) { PDecl = dstProto; break; } if (const ObjCInterfaceType *IFaceT = SrcType->castAs()->getInterfaceType()) IFace = IFaceT->getDecl(); } if (getLangOpts().CPlusPlus) { DiagKind = diag::err_incompatible_qualified_id; isInvalid = true; } else { DiagKind = diag::warn_incompatible_qualified_id; } break; } case IncompatibleVectors: if (getLangOpts().CPlusPlus) { DiagKind = diag::err_incompatible_vectors; isInvalid = true; } else { DiagKind = diag::warn_incompatible_vectors; } break; case IncompatibleObjCWeakRef: DiagKind = diag::err_arc_weak_unavailable_assign; isInvalid = true; break; case Incompatible: if (maybeDiagnoseAssignmentToFunction(*this, DstType, SrcExpr)) { if (Complained) *Complained = true; return true; } DiagKind = diag::err_typecheck_convert_incompatible; ConvHints.tryToFixConversion(SrcExpr, SrcType, DstType, *this); MayHaveConvFixit = true; isInvalid = true; MayHaveFunctionDiff = true; break; } QualType FirstType, SecondType; switch (Action) { case AA_Assigning: case AA_Initializing: // The destination type comes first. FirstType = DstType; SecondType = SrcType; break; case AA_Returning: case AA_Passing: case AA_Passing_CFAudited: case AA_Converting: case AA_Sending: case AA_Casting: // The source type comes first. FirstType = SrcType; SecondType = DstType; break; } PartialDiagnostic FDiag = PDiag(DiagKind); AssignmentAction ActionForDiag = Action; if (Action == AA_Passing_CFAudited) ActionForDiag = AA_Passing; FDiag << FirstType << SecondType << ActionForDiag << SrcExpr->getSourceRange(); if (DiagKind == diag::ext_typecheck_convert_incompatible_pointer_sign || DiagKind == diag::err_typecheck_convert_incompatible_pointer_sign) { auto isPlainChar = [](const clang::Type *Type) { return Type->isSpecificBuiltinType(BuiltinType::Char_S) || Type->isSpecificBuiltinType(BuiltinType::Char_U); }; FDiag << (isPlainChar(FirstType->getPointeeOrArrayElementType()) || isPlainChar(SecondType->getPointeeOrArrayElementType())); } // If we can fix the conversion, suggest the FixIts. if (!ConvHints.isNull()) { for (FixItHint &H : ConvHints.Hints) FDiag << H; } if (MayHaveConvFixit) { FDiag << (unsigned) (ConvHints.Kind); } if (MayHaveFunctionDiff) HandleFunctionTypeMismatch(FDiag, SecondType, FirstType); Diag(Loc, FDiag); if ((DiagKind == diag::warn_incompatible_qualified_id || DiagKind == diag::err_incompatible_qualified_id) && PDecl && IFace && !IFace->hasDefinition()) Diag(IFace->getLocation(), diag::note_incomplete_class_and_qualified_id) << IFace << PDecl; if (SecondType == Context.OverloadTy) NoteAllOverloadCandidates(OverloadExpr::find(SrcExpr).Expression, FirstType, /*TakingAddress=*/true); if (CheckInferredResultType) EmitRelatedResultTypeNote(SrcExpr); if (Action == AA_Returning && ConvTy == IncompatiblePointer) EmitRelatedResultTypeNoteForReturn(DstType); if (Complained) *Complained = true; return isInvalid; } ExprResult Sema::VerifyIntegerConstantExpression(Expr *E, llvm::APSInt *Result, AllowFoldKind CanFold) { class SimpleICEDiagnoser : public VerifyICEDiagnoser { public: SemaDiagnosticBuilder diagnoseNotICEType(Sema &S, SourceLocation Loc, QualType T) override { return S.Diag(Loc, diag::err_ice_not_integral) << T << S.LangOpts.CPlusPlus; } SemaDiagnosticBuilder diagnoseNotICE(Sema &S, SourceLocation Loc) override { return S.Diag(Loc, diag::err_expr_not_ice) << S.LangOpts.CPlusPlus; } } Diagnoser; return VerifyIntegerConstantExpression(E, Result, Diagnoser, CanFold); } ExprResult Sema::VerifyIntegerConstantExpression(Expr *E, llvm::APSInt *Result, unsigned DiagID, AllowFoldKind CanFold) { class IDDiagnoser : public VerifyICEDiagnoser { unsigned DiagID; public: IDDiagnoser(unsigned DiagID) : VerifyICEDiagnoser(DiagID == 0), DiagID(DiagID) { } SemaDiagnosticBuilder diagnoseNotICE(Sema &S, SourceLocation Loc) override { return S.Diag(Loc, DiagID); } } Diagnoser(DiagID); return VerifyIntegerConstantExpression(E, Result, Diagnoser, CanFold); } Sema::SemaDiagnosticBuilder Sema::VerifyICEDiagnoser::diagnoseNotICEType(Sema &S, SourceLocation Loc, QualType T) { return diagnoseNotICE(S, Loc); } Sema::SemaDiagnosticBuilder Sema::VerifyICEDiagnoser::diagnoseFold(Sema &S, SourceLocation Loc) { return S.Diag(Loc, diag::ext_expr_not_ice) << S.LangOpts.CPlusPlus; } ExprResult Sema::VerifyIntegerConstantExpression(Expr *E, llvm::APSInt *Result, VerifyICEDiagnoser &Diagnoser, AllowFoldKind CanFold) { SourceLocation DiagLoc = E->getBeginLoc(); if (getLangOpts().CPlusPlus11) { // C++11 [expr.const]p5: // If an expression of literal class type is used in a context where an // integral constant expression is required, then that class type shall // have a single non-explicit conversion function to an integral or // unscoped enumeration type ExprResult Converted; class CXX11ConvertDiagnoser : public ICEConvertDiagnoser { VerifyICEDiagnoser &BaseDiagnoser; public: CXX11ConvertDiagnoser(VerifyICEDiagnoser &BaseDiagnoser) : ICEConvertDiagnoser(/*AllowScopedEnumerations*/ false, BaseDiagnoser.Suppress, true), BaseDiagnoser(BaseDiagnoser) {} SemaDiagnosticBuilder diagnoseNotInt(Sema &S, SourceLocation Loc, QualType T) override { return BaseDiagnoser.diagnoseNotICEType(S, Loc, T); } SemaDiagnosticBuilder diagnoseIncomplete( Sema &S, SourceLocation Loc, QualType T) override { return S.Diag(Loc, diag::err_ice_incomplete_type) << T; } SemaDiagnosticBuilder diagnoseExplicitConv( Sema &S, SourceLocation Loc, QualType T, QualType ConvTy) override { return S.Diag(Loc, diag::err_ice_explicit_conversion) << T << ConvTy; } SemaDiagnosticBuilder noteExplicitConv( Sema &S, CXXConversionDecl *Conv, QualType ConvTy) override { return S.Diag(Conv->getLocation(), diag::note_ice_conversion_here) << ConvTy->isEnumeralType() << ConvTy; } SemaDiagnosticBuilder diagnoseAmbiguous( Sema &S, SourceLocation Loc, QualType T) override { return S.Diag(Loc, diag::err_ice_ambiguous_conversion) << T; } SemaDiagnosticBuilder noteAmbiguous( Sema &S, CXXConversionDecl *Conv, QualType ConvTy) override { return S.Diag(Conv->getLocation(), diag::note_ice_conversion_here) << ConvTy->isEnumeralType() << ConvTy; } SemaDiagnosticBuilder diagnoseConversion( Sema &S, SourceLocation Loc, QualType T, QualType ConvTy) override { llvm_unreachable("conversion functions are permitted"); } } ConvertDiagnoser(Diagnoser); Converted = PerformContextualImplicitConversion(DiagLoc, E, ConvertDiagnoser); if (Converted.isInvalid()) return Converted; E = Converted.get(); if (!E->getType()->isIntegralOrUnscopedEnumerationType()) return ExprError(); } else if (!E->getType()->isIntegralOrUnscopedEnumerationType()) { // An ICE must be of integral or unscoped enumeration type. if (!Diagnoser.Suppress) Diagnoser.diagnoseNotICEType(*this, DiagLoc, E->getType()) << E->getSourceRange(); return ExprError(); } ExprResult RValueExpr = DefaultLvalueConversion(E); if (RValueExpr.isInvalid()) return ExprError(); E = RValueExpr.get(); // Circumvent ICE checking in C++11 to avoid evaluating the expression twice // in the non-ICE case. if (!getLangOpts().CPlusPlus11 && E->isIntegerConstantExpr(Context)) { if (Result) *Result = E->EvaluateKnownConstIntCheckOverflow(Context); if (!isa(E)) E = Result ? ConstantExpr::Create(Context, E, APValue(*Result)) : ConstantExpr::Create(Context, E); return E; } Expr::EvalResult EvalResult; SmallVector Notes; EvalResult.Diag = &Notes; // Try to evaluate the expression, and produce diagnostics explaining why it's // not a constant expression as a side-effect. bool Folded = E->EvaluateAsRValue(EvalResult, Context, /*isConstantContext*/ true) && EvalResult.Val.isInt() && !EvalResult.HasSideEffects; if (!isa(E)) E = ConstantExpr::Create(Context, E, EvalResult.Val); // In C++11, we can rely on diagnostics being produced for any expression // which is not a constant expression. If no diagnostics were produced, then // this is a constant expression. if (Folded && getLangOpts().CPlusPlus11 && Notes.empty()) { if (Result) *Result = EvalResult.Val.getInt(); return E; } // If our only note is the usual "invalid subexpression" note, just point // the caret at its location rather than producing an essentially // redundant note. if (Notes.size() == 1 && Notes[0].second.getDiagID() == diag::note_invalid_subexpr_in_const_expr) { DiagLoc = Notes[0].first; Notes.clear(); } if (!Folded || !CanFold) { if (!Diagnoser.Suppress) { Diagnoser.diagnoseNotICE(*this, DiagLoc) << E->getSourceRange(); for (const PartialDiagnosticAt &Note : Notes) Diag(Note.first, Note.second); } return ExprError(); } Diagnoser.diagnoseFold(*this, DiagLoc) << E->getSourceRange(); for (const PartialDiagnosticAt &Note : Notes) Diag(Note.first, Note.second); if (Result) *Result = EvalResult.Val.getInt(); return E; } namespace { // Handle the case where we conclude a expression which we speculatively // considered to be unevaluated is actually evaluated. class TransformToPE : public TreeTransform { typedef TreeTransform BaseTransform; public: TransformToPE(Sema &SemaRef) : BaseTransform(SemaRef) { } // Make sure we redo semantic analysis bool AlwaysRebuild() { return true; } bool ReplacingOriginal() { return true; } // We need to special-case DeclRefExprs referring to FieldDecls which // are not part of a member pointer formation; normal TreeTransforming // doesn't catch this case because of the way we represent them in the AST. // FIXME: This is a bit ugly; is it really the best way to handle this // case? // // Error on DeclRefExprs referring to FieldDecls. ExprResult TransformDeclRefExpr(DeclRefExpr *E) { if (isa(E->getDecl()) && !SemaRef.isUnevaluatedContext()) return SemaRef.Diag(E->getLocation(), diag::err_invalid_non_static_member_use) << E->getDecl() << E->getSourceRange(); return BaseTransform::TransformDeclRefExpr(E); } // Exception: filter out member pointer formation ExprResult TransformUnaryOperator(UnaryOperator *E) { if (E->getOpcode() == UO_AddrOf && E->getType()->isMemberPointerType()) return E; return BaseTransform::TransformUnaryOperator(E); } // The body of a lambda-expression is in a separate expression evaluation // context so never needs to be transformed. // FIXME: Ideally we wouldn't transform the closure type either, and would // just recreate the capture expressions and lambda expression. StmtResult TransformLambdaBody(LambdaExpr *E, Stmt *Body) { return SkipLambdaBody(E, Body); } }; } ExprResult Sema::TransformToPotentiallyEvaluated(Expr *E) { assert(isUnevaluatedContext() && "Should only transform unevaluated expressions"); ExprEvalContexts.back().Context = ExprEvalContexts[ExprEvalContexts.size()-2].Context; if (isUnevaluatedContext()) return E; return TransformToPE(*this).TransformExpr(E); } TypeSourceInfo *Sema::TransformToPotentiallyEvaluated(TypeSourceInfo *TInfo) { assert(isUnevaluatedContext() && "Should only transform unevaluated expressions"); ExprEvalContexts.back().Context = ExprEvalContexts[ExprEvalContexts.size() - 2].Context; if (isUnevaluatedContext()) return TInfo; return TransformToPE(*this).TransformType(TInfo); } void Sema::PushExpressionEvaluationContext( ExpressionEvaluationContext NewContext, Decl *LambdaContextDecl, ExpressionEvaluationContextRecord::ExpressionKind ExprContext) { ExprEvalContexts.emplace_back(NewContext, ExprCleanupObjects.size(), Cleanup, LambdaContextDecl, ExprContext); // Discarded statements and immediate contexts nested in other // discarded statements or immediate context are themselves // a discarded statement or an immediate context, respectively. ExprEvalContexts.back().InDiscardedStatement = ExprEvalContexts[ExprEvalContexts.size() - 2] .isDiscardedStatementContext(); ExprEvalContexts.back().InImmediateFunctionContext = ExprEvalContexts[ExprEvalContexts.size() - 2] .isImmediateFunctionContext(); Cleanup.reset(); if (!MaybeODRUseExprs.empty()) std::swap(MaybeODRUseExprs, ExprEvalContexts.back().SavedMaybeODRUseExprs); } void Sema::PushExpressionEvaluationContext( ExpressionEvaluationContext NewContext, ReuseLambdaContextDecl_t, ExpressionEvaluationContextRecord::ExpressionKind ExprContext) { Decl *ClosureContextDecl = ExprEvalContexts.back().ManglingContextDecl; PushExpressionEvaluationContext(NewContext, ClosureContextDecl, ExprContext); } namespace { const DeclRefExpr *CheckPossibleDeref(Sema &S, const Expr *PossibleDeref) { PossibleDeref = PossibleDeref->IgnoreParenImpCasts(); if (const auto *E = dyn_cast(PossibleDeref)) { if (E->getOpcode() == UO_Deref) return CheckPossibleDeref(S, E->getSubExpr()); } else if (const auto *E = dyn_cast(PossibleDeref)) { return CheckPossibleDeref(S, E->getBase()); } else if (const auto *E = dyn_cast(PossibleDeref)) { return CheckPossibleDeref(S, E->getBase()); } else if (const auto E = dyn_cast(PossibleDeref)) { QualType Inner; QualType Ty = E->getType(); if (const auto *Ptr = Ty->getAs()) Inner = Ptr->getPointeeType(); else if (const auto *Arr = S.Context.getAsArrayType(Ty)) Inner = Arr->getElementType(); else return nullptr; if (Inner->hasAttr(attr::NoDeref)) return E; } return nullptr; } } // namespace void Sema::WarnOnPendingNoDerefs(ExpressionEvaluationContextRecord &Rec) { for (const Expr *E : Rec.PossibleDerefs) { const DeclRefExpr *DeclRef = CheckPossibleDeref(*this, E); if (DeclRef) { const ValueDecl *Decl = DeclRef->getDecl(); Diag(E->getExprLoc(), diag::warn_dereference_of_noderef_type) << Decl->getName() << E->getSourceRange(); Diag(Decl->getLocation(), diag::note_previous_decl) << Decl->getName(); } else { Diag(E->getExprLoc(), diag::warn_dereference_of_noderef_type_no_decl) << E->getSourceRange(); } } Rec.PossibleDerefs.clear(); } /// Check whether E, which is either a discarded-value expression or an /// unevaluated operand, is a simple-assignment to a volatlie-qualified lvalue, /// and if so, remove it from the list of volatile-qualified assignments that /// we are going to warn are deprecated. void Sema::CheckUnusedVolatileAssignment(Expr *E) { if (!E->getType().isVolatileQualified() || !getLangOpts().CPlusPlus20) return; // Note: ignoring parens here is not justified by the standard rules, but // ignoring parentheses seems like a more reasonable approach, and this only // drives a deprecation warning so doesn't affect conformance. if (auto *BO = dyn_cast(E->IgnoreParenImpCasts())) { if (BO->getOpcode() == BO_Assign) { auto &LHSs = ExprEvalContexts.back().VolatileAssignmentLHSs; llvm::erase_value(LHSs, BO->getLHS()); } } } ExprResult Sema::CheckForImmediateInvocation(ExprResult E, FunctionDecl *Decl) { if (isUnevaluatedContext() || !E.isUsable() || !Decl || !Decl->isConsteval() || isConstantEvaluated() || RebuildingImmediateInvocation || isImmediateFunctionContext()) return E; /// Opportunistically remove the callee from ReferencesToConsteval if we can. /// It's OK if this fails; we'll also remove this in /// HandleImmediateInvocations, but catching it here allows us to avoid /// walking the AST looking for it in simple cases. if (auto *Call = dyn_cast(E.get()->IgnoreImplicit())) if (auto *DeclRef = dyn_cast(Call->getCallee()->IgnoreImplicit())) ExprEvalContexts.back().ReferenceToConsteval.erase(DeclRef); E = MaybeCreateExprWithCleanups(E); ConstantExpr *Res = ConstantExpr::Create( getASTContext(), E.get(), ConstantExpr::getStorageKind(Decl->getReturnType().getTypePtr(), getASTContext()), /*IsImmediateInvocation*/ true); /// Value-dependent constant expressions should not be immediately /// evaluated until they are instantiated. if (!Res->isValueDependent()) ExprEvalContexts.back().ImmediateInvocationCandidates.emplace_back(Res, 0); return Res; } static void EvaluateAndDiagnoseImmediateInvocation( Sema &SemaRef, Sema::ImmediateInvocationCandidate Candidate) { llvm::SmallVector Notes; Expr::EvalResult Eval; Eval.Diag = &Notes; ConstantExpr *CE = Candidate.getPointer(); bool Result = CE->EvaluateAsConstantExpr( Eval, SemaRef.getASTContext(), ConstantExprKind::ImmediateInvocation); if (!Result || !Notes.empty()) { Expr *InnerExpr = CE->getSubExpr()->IgnoreImplicit(); if (auto *FunctionalCast = dyn_cast(InnerExpr)) InnerExpr = FunctionalCast->getSubExpr(); FunctionDecl *FD = nullptr; if (auto *Call = dyn_cast(InnerExpr)) FD = cast(Call->getCalleeDecl()); else if (auto *Call = dyn_cast(InnerExpr)) FD = Call->getConstructor(); else llvm_unreachable("unhandled decl kind"); assert(FD->isConsteval()); SemaRef.Diag(CE->getBeginLoc(), diag::err_invalid_consteval_call) << FD; for (auto &Note : Notes) SemaRef.Diag(Note.first, Note.second); return; } CE->MoveIntoResult(Eval.Val, SemaRef.getASTContext()); } static void RemoveNestedImmediateInvocation( Sema &SemaRef, Sema::ExpressionEvaluationContextRecord &Rec, SmallVector::reverse_iterator It) { struct ComplexRemove : TreeTransform { using Base = TreeTransform; llvm::SmallPtrSetImpl &DRSet; SmallVector &IISet; SmallVector::reverse_iterator CurrentII; ComplexRemove(Sema &SemaRef, llvm::SmallPtrSetImpl &DR, SmallVector &II, SmallVector::reverse_iterator Current) : Base(SemaRef), DRSet(DR), IISet(II), CurrentII(Current) {} void RemoveImmediateInvocation(ConstantExpr* E) { auto It = std::find_if(CurrentII, IISet.rend(), [E](Sema::ImmediateInvocationCandidate Elem) { return Elem.getPointer() == E; }); assert(It != IISet.rend() && "ConstantExpr marked IsImmediateInvocation should " "be present"); It->setInt(1); // Mark as deleted } ExprResult TransformConstantExpr(ConstantExpr *E) { if (!E->isImmediateInvocation()) return Base::TransformConstantExpr(E); RemoveImmediateInvocation(E); return Base::TransformExpr(E->getSubExpr()); } /// Base::TransfromCXXOperatorCallExpr doesn't traverse the callee so /// we need to remove its DeclRefExpr from the DRSet. ExprResult TransformCXXOperatorCallExpr(CXXOperatorCallExpr *E) { DRSet.erase(cast(E->getCallee()->IgnoreImplicit())); return Base::TransformCXXOperatorCallExpr(E); } /// Base::TransformInitializer skip ConstantExpr so we need to visit them /// here. ExprResult TransformInitializer(Expr *Init, bool NotCopyInit) { if (!Init) return Init; /// ConstantExpr are the first layer of implicit node to be removed so if /// Init isn't a ConstantExpr, no ConstantExpr will be skipped. if (auto *CE = dyn_cast(Init)) if (CE->isImmediateInvocation()) RemoveImmediateInvocation(CE); return Base::TransformInitializer(Init, NotCopyInit); } ExprResult TransformDeclRefExpr(DeclRefExpr *E) { DRSet.erase(E); return E; } + ExprResult TransformLambdaExpr(LambdaExpr *E) { + // Do not rebuild lambdas to avoid creating a new type. + // Lambdas have already been processed inside their eval context. + return E; + } bool AlwaysRebuild() { return false; } bool ReplacingOriginal() { return true; } bool AllowSkippingCXXConstructExpr() { bool Res = AllowSkippingFirstCXXConstructExpr; AllowSkippingFirstCXXConstructExpr = true; return Res; } bool AllowSkippingFirstCXXConstructExpr = true; } Transformer(SemaRef, Rec.ReferenceToConsteval, Rec.ImmediateInvocationCandidates, It); /// CXXConstructExpr with a single argument are getting skipped by /// TreeTransform in some situtation because they could be implicit. This /// can only occur for the top-level CXXConstructExpr because it is used /// nowhere in the expression being transformed therefore will not be rebuilt. /// Setting AllowSkippingFirstCXXConstructExpr to false will prevent from /// skipping the first CXXConstructExpr. if (isa(It->getPointer()->IgnoreImplicit())) Transformer.AllowSkippingFirstCXXConstructExpr = false; ExprResult Res = Transformer.TransformExpr(It->getPointer()->getSubExpr()); assert(Res.isUsable()); Res = SemaRef.MaybeCreateExprWithCleanups(Res); It->getPointer()->setSubExpr(Res.get()); } static void HandleImmediateInvocations(Sema &SemaRef, Sema::ExpressionEvaluationContextRecord &Rec) { if ((Rec.ImmediateInvocationCandidates.size() == 0 && Rec.ReferenceToConsteval.size() == 0) || SemaRef.RebuildingImmediateInvocation) return; /// When we have more then 1 ImmediateInvocationCandidates we need to check /// for nested ImmediateInvocationCandidates. when we have only 1 we only /// need to remove ReferenceToConsteval in the immediate invocation. if (Rec.ImmediateInvocationCandidates.size() > 1) { /// Prevent sema calls during the tree transform from adding pointers that /// are already in the sets. llvm::SaveAndRestore DisableIITracking( SemaRef.RebuildingImmediateInvocation, true); /// Prevent diagnostic during tree transfrom as they are duplicates Sema::TentativeAnalysisScope DisableDiag(SemaRef); for (auto It = Rec.ImmediateInvocationCandidates.rbegin(); It != Rec.ImmediateInvocationCandidates.rend(); It++) if (!It->getInt()) RemoveNestedImmediateInvocation(SemaRef, Rec, It); } else if (Rec.ImmediateInvocationCandidates.size() == 1 && Rec.ReferenceToConsteval.size()) { struct SimpleRemove : RecursiveASTVisitor { llvm::SmallPtrSetImpl &DRSet; SimpleRemove(llvm::SmallPtrSetImpl &S) : DRSet(S) {} bool VisitDeclRefExpr(DeclRefExpr *E) { DRSet.erase(E); return DRSet.size(); } } Visitor(Rec.ReferenceToConsteval); Visitor.TraverseStmt( Rec.ImmediateInvocationCandidates.front().getPointer()->getSubExpr()); } for (auto CE : Rec.ImmediateInvocationCandidates) if (!CE.getInt()) EvaluateAndDiagnoseImmediateInvocation(SemaRef, CE); for (auto DR : Rec.ReferenceToConsteval) { auto *FD = cast(DR->getDecl()); SemaRef.Diag(DR->getBeginLoc(), diag::err_invalid_consteval_take_address) << FD; SemaRef.Diag(FD->getLocation(), diag::note_declared_at); } } void Sema::PopExpressionEvaluationContext() { ExpressionEvaluationContextRecord& Rec = ExprEvalContexts.back(); unsigned NumTypos = Rec.NumTypos; if (!Rec.Lambdas.empty()) { using ExpressionKind = ExpressionEvaluationContextRecord::ExpressionKind; if (!getLangOpts().CPlusPlus20 && (Rec.ExprContext == ExpressionKind::EK_TemplateArgument || Rec.isUnevaluated() || (Rec.isConstantEvaluated() && !getLangOpts().CPlusPlus17))) { unsigned D; if (Rec.isUnevaluated()) { // C++11 [expr.prim.lambda]p2: // A lambda-expression shall not appear in an unevaluated operand // (Clause 5). D = diag::err_lambda_unevaluated_operand; } else if (Rec.isConstantEvaluated() && !getLangOpts().CPlusPlus17) { // C++1y [expr.const]p2: // A conditional-expression e is a core constant expression unless the // evaluation of e, following the rules of the abstract machine, would // evaluate [...] a lambda-expression. D = diag::err_lambda_in_constant_expression; } else if (Rec.ExprContext == ExpressionKind::EK_TemplateArgument) { // C++17 [expr.prim.lamda]p2: // A lambda-expression shall not appear [...] in a template-argument. D = diag::err_lambda_in_invalid_context; } else llvm_unreachable("Couldn't infer lambda error message."); for (const auto *L : Rec.Lambdas) Diag(L->getBeginLoc(), D); } } WarnOnPendingNoDerefs(Rec); HandleImmediateInvocations(*this, Rec); // Warn on any volatile-qualified simple-assignments that are not discarded- // value expressions nor unevaluated operands (those cases get removed from // this list by CheckUnusedVolatileAssignment). for (auto *BO : Rec.VolatileAssignmentLHSs) Diag(BO->getBeginLoc(), diag::warn_deprecated_simple_assign_volatile) << BO->getType(); // When are coming out of an unevaluated context, clear out any // temporaries that we may have created as part of the evaluation of // the expression in that context: they aren't relevant because they // will never be constructed. if (Rec.isUnevaluated() || Rec.isConstantEvaluated()) { ExprCleanupObjects.erase(ExprCleanupObjects.begin() + Rec.NumCleanupObjects, ExprCleanupObjects.end()); Cleanup = Rec.ParentCleanup; CleanupVarDeclMarking(); std::swap(MaybeODRUseExprs, Rec.SavedMaybeODRUseExprs); // Otherwise, merge the contexts together. } else { Cleanup.mergeFrom(Rec.ParentCleanup); MaybeODRUseExprs.insert(Rec.SavedMaybeODRUseExprs.begin(), Rec.SavedMaybeODRUseExprs.end()); } // Pop the current expression evaluation context off the stack. ExprEvalContexts.pop_back(); // The global expression evaluation context record is never popped. ExprEvalContexts.back().NumTypos += NumTypos; } void Sema::DiscardCleanupsInEvaluationContext() { ExprCleanupObjects.erase( ExprCleanupObjects.begin() + ExprEvalContexts.back().NumCleanupObjects, ExprCleanupObjects.end()); Cleanup.reset(); MaybeODRUseExprs.clear(); } ExprResult Sema::HandleExprEvaluationContextForTypeof(Expr *E) { ExprResult Result = CheckPlaceholderExpr(E); if (Result.isInvalid()) return ExprError(); E = Result.get(); if (!E->getType()->isVariablyModifiedType()) return E; return TransformToPotentiallyEvaluated(E); } /// Are we in a context that is potentially constant evaluated per C++20 /// [expr.const]p12? static bool isPotentiallyConstantEvaluatedContext(Sema &SemaRef) { /// C++2a [expr.const]p12: // An expression or conversion is potentially constant evaluated if it is switch (SemaRef.ExprEvalContexts.back().Context) { case Sema::ExpressionEvaluationContext::ConstantEvaluated: case Sema::ExpressionEvaluationContext::ImmediateFunctionContext: // -- a manifestly constant-evaluated expression, case Sema::ExpressionEvaluationContext::PotentiallyEvaluated: case Sema::ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed: case Sema::ExpressionEvaluationContext::DiscardedStatement: // -- a potentially-evaluated expression, case Sema::ExpressionEvaluationContext::UnevaluatedList: // -- an immediate subexpression of a braced-init-list, // -- [FIXME] an expression of the form & cast-expression that occurs // within a templated entity // -- a subexpression of one of the above that is not a subexpression of // a nested unevaluated operand. return true; case Sema::ExpressionEvaluationContext::Unevaluated: case Sema::ExpressionEvaluationContext::UnevaluatedAbstract: // Expressions in this context are never evaluated. return false; } llvm_unreachable("Invalid context"); } /// Return true if this function has a calling convention that requires mangling /// in the size of the parameter pack. static bool funcHasParameterSizeMangling(Sema &S, FunctionDecl *FD) { // These manglings don't do anything on non-Windows or non-x86 platforms, so // we don't need parameter type sizes. const llvm::Triple &TT = S.Context.getTargetInfo().getTriple(); if (!TT.isOSWindows() || !TT.isX86()) return false; // If this is C++ and this isn't an extern "C" function, parameters do not // need to be complete. In this case, C++ mangling will apply, which doesn't // use the size of the parameters. if (S.getLangOpts().CPlusPlus && !FD->isExternC()) return false; // Stdcall, fastcall, and vectorcall need this special treatment. CallingConv CC = FD->getType()->castAs()->getCallConv(); switch (CC) { case CC_X86StdCall: case CC_X86FastCall: case CC_X86VectorCall: return true; default: break; } return false; } /// Require that all of the parameter types of function be complete. Normally, /// parameter types are only required to be complete when a function is called /// or defined, but to mangle functions with certain calling conventions, the /// mangler needs to know the size of the parameter list. In this situation, /// MSVC doesn't emit an error or instantiate templates. Instead, MSVC mangles /// the function as _foo@0, i.e. zero bytes of parameters, which will usually /// result in a linker error. Clang doesn't implement this behavior, and instead /// attempts to error at compile time. static void CheckCompleteParameterTypesForMangler(Sema &S, FunctionDecl *FD, SourceLocation Loc) { class ParamIncompleteTypeDiagnoser : public Sema::TypeDiagnoser { FunctionDecl *FD; ParmVarDecl *Param; public: ParamIncompleteTypeDiagnoser(FunctionDecl *FD, ParmVarDecl *Param) : FD(FD), Param(Param) {} void diagnose(Sema &S, SourceLocation Loc, QualType T) override { CallingConv CC = FD->getType()->castAs()->getCallConv(); StringRef CCName; switch (CC) { case CC_X86StdCall: CCName = "stdcall"; break; case CC_X86FastCall: CCName = "fastcall"; break; case CC_X86VectorCall: CCName = "vectorcall"; break; default: llvm_unreachable("CC does not need mangling"); } S.Diag(Loc, diag::err_cconv_incomplete_param_type) << Param->getDeclName() << FD->getDeclName() << CCName; } }; for (ParmVarDecl *Param : FD->parameters()) { ParamIncompleteTypeDiagnoser Diagnoser(FD, Param); S.RequireCompleteType(Loc, Param->getType(), Diagnoser); } } namespace { enum class OdrUseContext { /// Declarations in this context are not odr-used. None, /// Declarations in this context are formally odr-used, but this is a /// dependent context. Dependent, /// Declarations in this context are odr-used but not actually used (yet). FormallyOdrUsed, /// Declarations in this context are used. Used }; } /// Are we within a context in which references to resolved functions or to /// variables result in odr-use? static OdrUseContext isOdrUseContext(Sema &SemaRef) { OdrUseContext Result; switch (SemaRef.ExprEvalContexts.back().Context) { case Sema::ExpressionEvaluationContext::Unevaluated: case Sema::ExpressionEvaluationContext::UnevaluatedList: case Sema::ExpressionEvaluationContext::UnevaluatedAbstract: return OdrUseContext::None; case Sema::ExpressionEvaluationContext::ConstantEvaluated: case Sema::ExpressionEvaluationContext::ImmediateFunctionContext: case Sema::ExpressionEvaluationContext::PotentiallyEvaluated: Result = OdrUseContext::Used; break; case Sema::ExpressionEvaluationContext::DiscardedStatement: Result = OdrUseContext::FormallyOdrUsed; break; case Sema::ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed: // A default argument formally results in odr-use, but doesn't actually // result in a use in any real sense until it itself is used. Result = OdrUseContext::FormallyOdrUsed; break; } if (SemaRef.CurContext->isDependentContext()) return OdrUseContext::Dependent; return Result; } static bool isImplicitlyDefinableConstexprFunction(FunctionDecl *Func) { if (!Func->isConstexpr()) return false; if (Func->isImplicitlyInstantiable() || !Func->isUserProvided()) return true; auto *CCD = dyn_cast(Func); return CCD && CCD->getInheritedConstructor(); } /// Mark a function referenced, and check whether it is odr-used /// (C++ [basic.def.odr]p2, C99 6.9p3) void Sema::MarkFunctionReferenced(SourceLocation Loc, FunctionDecl *Func, bool MightBeOdrUse) { assert(Func && "No function?"); Func->setReferenced(); // Recursive functions aren't really used until they're used from some other // context. bool IsRecursiveCall = CurContext == Func; // C++11 [basic.def.odr]p3: // A function whose name appears as a potentially-evaluated expression is // odr-used if it is the unique lookup result or the selected member of a // set of overloaded functions [...]. // // We (incorrectly) mark overload resolution as an unevaluated context, so we // can just check that here. OdrUseContext OdrUse = MightBeOdrUse ? isOdrUseContext(*this) : OdrUseContext::None; if (IsRecursiveCall && OdrUse == OdrUseContext::Used) OdrUse = OdrUseContext::FormallyOdrUsed; // Trivial default constructors and destructors are never actually used. // FIXME: What about other special members? if (Func->isTrivial() && !Func->hasAttr() && OdrUse == OdrUseContext::Used) { if (auto *Constructor = dyn_cast(Func)) if (Constructor->isDefaultConstructor()) OdrUse = OdrUseContext::FormallyOdrUsed; if (isa(Func)) OdrUse = OdrUseContext::FormallyOdrUsed; } // C++20 [expr.const]p12: // A function [...] is needed for constant evaluation if it is [...] a // constexpr function that is named by an expression that is potentially // constant evaluated bool NeededForConstantEvaluation = isPotentiallyConstantEvaluatedContext(*this) && isImplicitlyDefinableConstexprFunction(Func); // Determine whether we require a function definition to exist, per // C++11 [temp.inst]p3: // Unless a function template specialization has been explicitly // instantiated or explicitly specialized, the function template // specialization is implicitly instantiated when the specialization is // referenced in a context that requires a function definition to exist. // C++20 [temp.inst]p7: // The existence of a definition of a [...] function is considered to // affect the semantics of the program if the [...] function is needed for // constant evaluation by an expression // C++20 [basic.def.odr]p10: // Every program shall contain exactly one definition of every non-inline // function or variable that is odr-used in that program outside of a // discarded statement // C++20 [special]p1: // The implementation will implicitly define [defaulted special members] // if they are odr-used or needed for constant evaluation. // // Note that we skip the implicit instantiation of templates that are only // used in unused default arguments or by recursive calls to themselves. // This is formally non-conforming, but seems reasonable in practice. bool NeedDefinition = !IsRecursiveCall && (OdrUse == OdrUseContext::Used || NeededForConstantEvaluation); // C++14 [temp.expl.spec]p6: // If a template [...] is explicitly specialized then that specialization // shall be declared before the first use of that specialization that would // cause an implicit instantiation to take place, in every translation unit // in which such a use occurs if (NeedDefinition && (Func->getTemplateSpecializationKind() != TSK_Undeclared || Func->getMemberSpecializationInfo())) checkSpecializationReachability(Loc, Func); if (getLangOpts().CUDA) CheckCUDACall(Loc, Func); if (getLangOpts().SYCLIsDevice) checkSYCLDeviceFunction(Loc, Func); // If we need a definition, try to create one. if (NeedDefinition && !Func->getBody()) { runWithSufficientStackSpace(Loc, [&] { if (CXXConstructorDecl *Constructor = dyn_cast(Func)) { Constructor = cast(Constructor->getFirstDecl()); if (Constructor->isDefaulted() && !Constructor->isDeleted()) { if (Constructor->isDefaultConstructor()) { if (Constructor->isTrivial() && !Constructor->hasAttr()) return; DefineImplicitDefaultConstructor(Loc, Constructor); } else if (Constructor->isCopyConstructor()) { DefineImplicitCopyConstructor(Loc, Constructor); } else if (Constructor->isMoveConstructor()) { DefineImplicitMoveConstructor(Loc, Constructor); } } else if (Constructor->getInheritedConstructor()) { DefineInheritingConstructor(Loc, Constructor); } } else if (CXXDestructorDecl *Destructor = dyn_cast(Func)) { Destructor = cast(Destructor->getFirstDecl()); if (Destructor->isDefaulted() && !Destructor->isDeleted()) { if (Destructor->isTrivial() && !Destructor->hasAttr()) return; DefineImplicitDestructor(Loc, Destructor); } if (Destructor->isVirtual() && getLangOpts().AppleKext) MarkVTableUsed(Loc, Destructor->getParent()); } else if (CXXMethodDecl *MethodDecl = dyn_cast(Func)) { if (MethodDecl->isOverloadedOperator() && MethodDecl->getOverloadedOperator() == OO_Equal) { MethodDecl = cast(MethodDecl->getFirstDecl()); if (MethodDecl->isDefaulted() && !MethodDecl->isDeleted()) { if (MethodDecl->isCopyAssignmentOperator()) DefineImplicitCopyAssignment(Loc, MethodDecl); else if (MethodDecl->isMoveAssignmentOperator()) DefineImplicitMoveAssignment(Loc, MethodDecl); } } else if (isa(MethodDecl) && MethodDecl->getParent()->isLambda()) { CXXConversionDecl *Conversion = cast(MethodDecl->getFirstDecl()); if (Conversion->isLambdaToBlockPointerConversion()) DefineImplicitLambdaToBlockPointerConversion(Loc, Conversion); else DefineImplicitLambdaToFunctionPointerConversion(Loc, Conversion); } else if (MethodDecl->isVirtual() && getLangOpts().AppleKext) MarkVTableUsed(Loc, MethodDecl->getParent()); } if (Func->isDefaulted() && !Func->isDeleted()) { DefaultedComparisonKind DCK = getDefaultedComparisonKind(Func); if (DCK != DefaultedComparisonKind::None) DefineDefaultedComparison(Loc, Func, DCK); } // Implicit instantiation of function templates and member functions of // class templates. if (Func->isImplicitlyInstantiable()) { TemplateSpecializationKind TSK = Func->getTemplateSpecializationKindForInstantiation(); SourceLocation PointOfInstantiation = Func->getPointOfInstantiation(); bool FirstInstantiation = PointOfInstantiation.isInvalid(); if (FirstInstantiation) { PointOfInstantiation = Loc; if (auto *MSI = Func->getMemberSpecializationInfo()) MSI->setPointOfInstantiation(Loc); // FIXME: Notify listener. else Func->setTemplateSpecializationKind(TSK, PointOfInstantiation); } else if (TSK != TSK_ImplicitInstantiation) { // Use the point of use as the point of instantiation, instead of the // point of explicit instantiation (which we track as the actual point // of instantiation). This gives better backtraces in diagnostics. PointOfInstantiation = Loc; } if (FirstInstantiation || TSK != TSK_ImplicitInstantiation || Func->isConstexpr()) { if (isa(Func->getDeclContext()) && cast(Func->getDeclContext())->isLocalClass() && CodeSynthesisContexts.size()) PendingLocalImplicitInstantiations.push_back( std::make_pair(Func, PointOfInstantiation)); else if (Func->isConstexpr()) // Do not defer instantiations of constexpr functions, to avoid the // expression evaluator needing to call back into Sema if it sees a // call to such a function. InstantiateFunctionDefinition(PointOfInstantiation, Func); else { Func->setInstantiationIsPending(true); PendingInstantiations.push_back( std::make_pair(Func, PointOfInstantiation)); // Notify the consumer that a function was implicitly instantiated. Consumer.HandleCXXImplicitFunctionInstantiation(Func); } } } else { // Walk redefinitions, as some of them may be instantiable. for (auto i : Func->redecls()) { if (!i->isUsed(false) && i->isImplicitlyInstantiable()) MarkFunctionReferenced(Loc, i, MightBeOdrUse); } } }); } // C++14 [except.spec]p17: // An exception-specification is considered to be needed when: // - the function is odr-used or, if it appears in an unevaluated operand, // would be odr-used if the expression were potentially-evaluated; // // Note, we do this even if MightBeOdrUse is false. That indicates that the // function is a pure virtual function we're calling, and in that case the // function was selected by overload resolution and we need to resolve its // exception specification for a different reason. const FunctionProtoType *FPT = Func->getType()->getAs(); if (FPT && isUnresolvedExceptionSpec(FPT->getExceptionSpecType())) ResolveExceptionSpec(Loc, FPT); // If this is the first "real" use, act on that. if (OdrUse == OdrUseContext::Used && !Func->isUsed(/*CheckUsedAttr=*/false)) { // Keep track of used but undefined functions. if (!Func->isDefined()) { if (mightHaveNonExternalLinkage(Func)) UndefinedButUsed.insert(std::make_pair(Func->getCanonicalDecl(), Loc)); else if (Func->getMostRecentDecl()->isInlined() && !LangOpts.GNUInline && !Func->getMostRecentDecl()->hasAttr()) UndefinedButUsed.insert(std::make_pair(Func->getCanonicalDecl(), Loc)); else if (isExternalWithNoLinkageType(Func)) UndefinedButUsed.insert(std::make_pair(Func->getCanonicalDecl(), Loc)); } // Some x86 Windows calling conventions mangle the size of the parameter // pack into the name. Computing the size of the parameters requires the // parameter types to be complete. Check that now. if (funcHasParameterSizeMangling(*this, Func)) CheckCompleteParameterTypesForMangler(*this, Func, Loc); // In the MS C++ ABI, the compiler emits destructor variants where they are // used. If the destructor is used here but defined elsewhere, mark the // virtual base destructors referenced. If those virtual base destructors // are inline, this will ensure they are defined when emitting the complete // destructor variant. This checking may be redundant if the destructor is // provided later in this TU. if (Context.getTargetInfo().getCXXABI().isMicrosoft()) { if (auto *Dtor = dyn_cast(Func)) { CXXRecordDecl *Parent = Dtor->getParent(); if (Parent->getNumVBases() > 0 && !Dtor->getBody()) CheckCompleteDestructorVariant(Loc, Dtor); } } Func->markUsed(Context); } } /// Directly mark a variable odr-used. Given a choice, prefer to use /// MarkVariableReferenced since it does additional checks and then /// calls MarkVarDeclODRUsed. /// If the variable must be captured: /// - if FunctionScopeIndexToStopAt is null, capture it in the CurContext /// - else capture it in the DeclContext that maps to the /// *FunctionScopeIndexToStopAt on the FunctionScopeInfo stack. static void MarkVarDeclODRUsed(VarDecl *Var, SourceLocation Loc, Sema &SemaRef, const unsigned *const FunctionScopeIndexToStopAt = nullptr) { // Keep track of used but undefined variables. // FIXME: We shouldn't suppress this warning for static data members. if (Var->hasDefinition(SemaRef.Context) == VarDecl::DeclarationOnly && (!Var->isExternallyVisible() || Var->isInline() || SemaRef.isExternalWithNoLinkageType(Var)) && !(Var->isStaticDataMember() && Var->hasInit())) { SourceLocation &old = SemaRef.UndefinedButUsed[Var->getCanonicalDecl()]; if (old.isInvalid()) old = Loc; } QualType CaptureType, DeclRefType; if (SemaRef.LangOpts.OpenMP) SemaRef.tryCaptureOpenMPLambdas(Var); SemaRef.tryCaptureVariable(Var, Loc, Sema::TryCapture_Implicit, /*EllipsisLoc*/ SourceLocation(), /*BuildAndDiagnose*/ true, CaptureType, DeclRefType, FunctionScopeIndexToStopAt); if (SemaRef.LangOpts.CUDA && Var->hasGlobalStorage()) { auto *FD = dyn_cast_or_null(SemaRef.CurContext); auto VarTarget = SemaRef.IdentifyCUDATarget(Var); auto UserTarget = SemaRef.IdentifyCUDATarget(FD); if (VarTarget == Sema::CVT_Host && (UserTarget == Sema::CFT_Device || UserTarget == Sema::CFT_HostDevice || UserTarget == Sema::CFT_Global)) { // Diagnose ODR-use of host global variables in device functions. // Reference of device global variables in host functions is allowed // through shadow variables therefore it is not diagnosed. if (SemaRef.LangOpts.CUDAIsDevice) { SemaRef.targetDiag(Loc, diag::err_ref_bad_target) << /*host*/ 2 << /*variable*/ 1 << Var << UserTarget; SemaRef.targetDiag(Var->getLocation(), Var->getType().isConstQualified() ? diag::note_cuda_const_var_unpromoted : diag::note_cuda_host_var); } } else if (VarTarget == Sema::CVT_Device && (UserTarget == Sema::CFT_Host || UserTarget == Sema::CFT_HostDevice)) { // Record a CUDA/HIP device side variable if it is ODR-used // by host code. This is done conservatively, when the variable is // referenced in any of the following contexts: // - a non-function context // - a host function // - a host device function // This makes the ODR-use of the device side variable by host code to // be visible in the device compilation for the compiler to be able to // emit template variables instantiated by host code only and to // externalize the static device side variable ODR-used by host code. if (!Var->hasExternalStorage()) SemaRef.getASTContext().CUDADeviceVarODRUsedByHost.insert(Var); else if (SemaRef.LangOpts.GPURelocatableDeviceCode) SemaRef.getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Var); } } Var->markUsed(SemaRef.Context); } void Sema::MarkCaptureUsedInEnclosingContext(VarDecl *Capture, SourceLocation Loc, unsigned CapturingScopeIndex) { MarkVarDeclODRUsed(Capture, Loc, *this, &CapturingScopeIndex); } static void diagnoseUncapturableValueReference(Sema &S, SourceLocation loc, ValueDecl *var) { DeclContext *VarDC = var->getDeclContext(); // If the parameter still belongs to the translation unit, then // we're actually just using one parameter in the declaration of // the next. if (isa(var) && isa(VarDC)) return; // For C code, don't diagnose about capture if we're not actually in code // right now; it's impossible to write a non-constant expression outside of // function context, so we'll get other (more useful) diagnostics later. // // For C++, things get a bit more nasty... it would be nice to suppress this // diagnostic for certain cases like using a local variable in an array bound // for a member of a local class, but the correct predicate is not obvious. if (!S.getLangOpts().CPlusPlus && !S.CurContext->isFunctionOrMethod()) return; unsigned ValueKind = isa(var) ? 1 : 0; unsigned ContextKind = 3; // unknown if (isa(VarDC) && cast(VarDC->getParent())->isLambda()) { ContextKind = 2; } else if (isa(VarDC)) { ContextKind = 0; } else if (isa(VarDC)) { ContextKind = 1; } S.Diag(loc, diag::err_reference_to_local_in_enclosing_context) << var << ValueKind << ContextKind << VarDC; S.Diag(var->getLocation(), diag::note_entity_declared_at) << var; // FIXME: Add additional diagnostic info about class etc. which prevents // capture. } static bool isVariableAlreadyCapturedInScopeInfo(CapturingScopeInfo *CSI, VarDecl *Var, bool &SubCapturesAreNested, QualType &CaptureType, QualType &DeclRefType) { // Check whether we've already captured it. if (CSI->CaptureMap.count(Var)) { // If we found a capture, any subcaptures are nested. SubCapturesAreNested = true; // Retrieve the capture type for this variable. CaptureType = CSI->getCapture(Var).getCaptureType(); // Compute the type of an expression that refers to this variable. DeclRefType = CaptureType.getNonReferenceType(); // Similarly to mutable captures in lambda, all the OpenMP captures by copy // are mutable in the sense that user can change their value - they are // private instances of the captured declarations. const Capture &Cap = CSI->getCapture(Var); if (Cap.isCopyCapture() && !(isa(CSI) && cast(CSI)->Mutable) && !(isa(CSI) && cast(CSI)->CapRegionKind == CR_OpenMP)) DeclRefType.addConst(); return true; } return false; } // Only block literals, captured statements, and lambda expressions can // capture; other scopes don't work. static DeclContext *getParentOfCapturingContextOrNull(DeclContext *DC, VarDecl *Var, SourceLocation Loc, const bool Diagnose, Sema &S) { if (isa(DC) || isa(DC) || isLambdaCallOperator(DC)) return getLambdaAwareParentOfDeclContext(DC); else if (Var->hasLocalStorage()) { if (Diagnose) diagnoseUncapturableValueReference(S, Loc, Var); } return nullptr; } // Certain capturing entities (lambdas, blocks etc.) are not allowed to capture // certain types of variables (unnamed, variably modified types etc.) // so check for eligibility. static bool isVariableCapturable(CapturingScopeInfo *CSI, VarDecl *Var, SourceLocation Loc, const bool Diagnose, Sema &S) { bool IsBlock = isa(CSI); bool IsLambda = isa(CSI); // Lambdas are not allowed to capture unnamed variables // (e.g. anonymous unions). // FIXME: The C++11 rule don't actually state this explicitly, but I'm // assuming that's the intent. if (IsLambda && !Var->getDeclName()) { if (Diagnose) { S.Diag(Loc, diag::err_lambda_capture_anonymous_var); S.Diag(Var->getLocation(), diag::note_declared_at); } return false; } // Prohibit variably-modified types in blocks; they're difficult to deal with. if (Var->getType()->isVariablyModifiedType() && IsBlock) { if (Diagnose) { S.Diag(Loc, diag::err_ref_vm_type); S.Diag(Var->getLocation(), diag::note_previous_decl) << Var; } return false; } // Prohibit structs with flexible array members too. // We cannot capture what is in the tail end of the struct. if (const RecordType *VTTy = Var->getType()->getAs()) { if (VTTy->getDecl()->hasFlexibleArrayMember()) { if (Diagnose) { if (IsBlock) S.Diag(Loc, diag::err_ref_flexarray_type); else S.Diag(Loc, diag::err_lambda_capture_flexarray_type) << Var; S.Diag(Var->getLocation(), diag::note_previous_decl) << Var; } return false; } } const bool HasBlocksAttr = Var->hasAttr(); // Lambdas and captured statements are not allowed to capture __block // variables; they don't support the expected semantics. if (HasBlocksAttr && (IsLambda || isa(CSI))) { if (Diagnose) { S.Diag(Loc, diag::err_capture_block_variable) << Var << !IsLambda; S.Diag(Var->getLocation(), diag::note_previous_decl) << Var; } return false; } // OpenCL v2.0 s6.12.5: Blocks cannot reference/capture other blocks if (S.getLangOpts().OpenCL && IsBlock && Var->getType()->isBlockPointerType()) { if (Diagnose) S.Diag(Loc, diag::err_opencl_block_ref_block); return false; } return true; } // Returns true if the capture by block was successful. static bool captureInBlock(BlockScopeInfo *BSI, VarDecl *Var, SourceLocation Loc, const bool BuildAndDiagnose, QualType &CaptureType, QualType &DeclRefType, const bool Nested, Sema &S, bool Invalid) { bool ByRef = false; // Blocks are not allowed to capture arrays, excepting OpenCL. // OpenCL v2.0 s1.12.5 (revision 40): arrays are captured by reference // (decayed to pointers). if (!Invalid && !S.getLangOpts().OpenCL && CaptureType->isArrayType()) { if (BuildAndDiagnose) { S.Diag(Loc, diag::err_ref_array_type); S.Diag(Var->getLocation(), diag::note_previous_decl) << Var; Invalid = true; } else { return false; } } // Forbid the block-capture of autoreleasing variables. if (!Invalid && CaptureType.getObjCLifetime() == Qualifiers::OCL_Autoreleasing) { if (BuildAndDiagnose) { S.Diag(Loc, diag::err_arc_autoreleasing_capture) << /*block*/ 0; S.Diag(Var->getLocation(), diag::note_previous_decl) << Var; Invalid = true; } else { return false; } } // Warn about implicitly autoreleasing indirect parameters captured by blocks. if (const auto *PT = CaptureType->getAs()) { QualType PointeeTy = PT->getPointeeType(); if (!Invalid && PointeeTy->getAs() && PointeeTy.getObjCLifetime() == Qualifiers::OCL_Autoreleasing && !S.Context.hasDirectOwnershipQualifier(PointeeTy)) { if (BuildAndDiagnose) { SourceLocation VarLoc = Var->getLocation(); S.Diag(Loc, diag::warn_block_capture_autoreleasing); S.Diag(VarLoc, diag::note_declare_parameter_strong); } } } const bool HasBlocksAttr = Var->hasAttr(); if (HasBlocksAttr || CaptureType->isReferenceType() || (S.getLangOpts().OpenMP && S.isOpenMPCapturedDecl(Var))) { // Block capture by reference does not change the capture or // declaration reference types. ByRef = true; } else { // Block capture by copy introduces 'const'. CaptureType = CaptureType.getNonReferenceType().withConst(); DeclRefType = CaptureType; } // Actually capture the variable. if (BuildAndDiagnose) BSI->addCapture(Var, HasBlocksAttr, ByRef, Nested, Loc, SourceLocation(), CaptureType, Invalid); return !Invalid; } /// Capture the given variable in the captured region. static bool captureInCapturedRegion( CapturedRegionScopeInfo *RSI, VarDecl *Var, SourceLocation Loc, const bool BuildAndDiagnose, QualType &CaptureType, QualType &DeclRefType, const bool RefersToCapturedVariable, Sema::TryCaptureKind Kind, bool IsTopScope, Sema &S, bool Invalid) { // By default, capture variables by reference. bool ByRef = true; if (IsTopScope && Kind != Sema::TryCapture_Implicit) { ByRef = (Kind == Sema::TryCapture_ExplicitByRef); } else if (S.getLangOpts().OpenMP && RSI->CapRegionKind == CR_OpenMP) { // Using an LValue reference type is consistent with Lambdas (see below). if (S.isOpenMPCapturedDecl(Var)) { bool HasConst = DeclRefType.isConstQualified(); DeclRefType = DeclRefType.getUnqualifiedType(); // Don't lose diagnostics about assignments to const. if (HasConst) DeclRefType.addConst(); } // Do not capture firstprivates in tasks. if (S.isOpenMPPrivateDecl(Var, RSI->OpenMPLevel, RSI->OpenMPCaptureLevel) != OMPC_unknown) return true; ByRef = S.isOpenMPCapturedByRef(Var, RSI->OpenMPLevel, RSI->OpenMPCaptureLevel); } if (ByRef) CaptureType = S.Context.getLValueReferenceType(DeclRefType); else CaptureType = DeclRefType; // Actually capture the variable. if (BuildAndDiagnose) RSI->addCapture(Var, /*isBlock*/ false, ByRef, RefersToCapturedVariable, Loc, SourceLocation(), CaptureType, Invalid); return !Invalid; } /// Capture the given variable in the lambda. static bool captureInLambda(LambdaScopeInfo *LSI, VarDecl *Var, SourceLocation Loc, const bool BuildAndDiagnose, QualType &CaptureType, QualType &DeclRefType, const bool RefersToCapturedVariable, const Sema::TryCaptureKind Kind, SourceLocation EllipsisLoc, const bool IsTopScope, Sema &S, bool Invalid) { // Determine whether we are capturing by reference or by value. bool ByRef = false; if (IsTopScope && Kind != Sema::TryCapture_Implicit) { ByRef = (Kind == Sema::TryCapture_ExplicitByRef); } else { ByRef = (LSI->ImpCaptureStyle == LambdaScopeInfo::ImpCap_LambdaByref); } // Compute the type of the field that will capture this variable. if (ByRef) { // C++11 [expr.prim.lambda]p15: // An entity is captured by reference if it is implicitly or // explicitly captured but not captured by copy. It is // unspecified whether additional unnamed non-static data // members are declared in the closure type for entities // captured by reference. // // FIXME: It is not clear whether we want to build an lvalue reference // to the DeclRefType or to CaptureType.getNonReferenceType(). GCC appears // to do the former, while EDG does the latter. Core issue 1249 will // clarify, but for now we follow GCC because it's a more permissive and // easily defensible position. CaptureType = S.Context.getLValueReferenceType(DeclRefType); } else { // C++11 [expr.prim.lambda]p14: // For each entity captured by copy, an unnamed non-static // data member is declared in the closure type. The // declaration order of these members is unspecified. The type // of such a data member is the type of the corresponding // captured entity if the entity is not a reference to an // object, or the referenced type otherwise. [Note: If the // captured entity is a reference to a function, the // corresponding data member is also a reference to a // function. - end note ] if (const ReferenceType *RefType = CaptureType->getAs()){ if (!RefType->getPointeeType()->isFunctionType()) CaptureType = RefType->getPointeeType(); } // Forbid the lambda copy-capture of autoreleasing variables. if (!Invalid && CaptureType.getObjCLifetime() == Qualifiers::OCL_Autoreleasing) { if (BuildAndDiagnose) { S.Diag(Loc, diag::err_arc_autoreleasing_capture) << /*lambda*/ 1; S.Diag(Var->getLocation(), diag::note_previous_decl) << Var->getDeclName(); Invalid = true; } else { return false; } } // Make sure that by-copy captures are of a complete and non-abstract type. if (!Invalid && BuildAndDiagnose) { if (!CaptureType->isDependentType() && S.RequireCompleteSizedType( Loc, CaptureType, diag::err_capture_of_incomplete_or_sizeless_type, Var->getDeclName())) Invalid = true; else if (S.RequireNonAbstractType(Loc, CaptureType, diag::err_capture_of_abstract_type)) Invalid = true; } } // Compute the type of a reference to this captured variable. if (ByRef) DeclRefType = CaptureType.getNonReferenceType(); else { // C++ [expr.prim.lambda]p5: // The closure type for a lambda-expression has a public inline // function call operator [...]. This function call operator is // declared const (9.3.1) if and only if the lambda-expression's // parameter-declaration-clause is not followed by mutable. DeclRefType = CaptureType.getNonReferenceType(); if (!LSI->Mutable && !CaptureType->isReferenceType()) DeclRefType.addConst(); } // Add the capture. if (BuildAndDiagnose) LSI->addCapture(Var, /*isBlock=*/false, ByRef, RefersToCapturedVariable, Loc, EllipsisLoc, CaptureType, Invalid); return !Invalid; } static bool canCaptureVariableByCopy(VarDecl *Var, const ASTContext &Context) { // Offer a Copy fix even if the type is dependent. if (Var->getType()->isDependentType()) return true; QualType T = Var->getType().getNonReferenceType(); if (T.isTriviallyCopyableType(Context)) return true; if (CXXRecordDecl *RD = T->getAsCXXRecordDecl()) { if (!(RD = RD->getDefinition())) return false; if (RD->hasSimpleCopyConstructor()) return true; if (RD->hasUserDeclaredCopyConstructor()) for (CXXConstructorDecl *Ctor : RD->ctors()) if (Ctor->isCopyConstructor()) return !Ctor->isDeleted(); } return false; } /// Create up to 4 fix-its for explicit reference and value capture of \p Var or /// default capture. Fixes may be omitted if they aren't allowed by the /// standard, for example we can't emit a default copy capture fix-it if we /// already explicitly copy capture capture another variable. static void buildLambdaCaptureFixit(Sema &Sema, LambdaScopeInfo *LSI, VarDecl *Var) { assert(LSI->ImpCaptureStyle == CapturingScopeInfo::ImpCap_None); // Don't offer Capture by copy of default capture by copy fixes if Var is // known not to be copy constructible. bool ShouldOfferCopyFix = canCaptureVariableByCopy(Var, Sema.getASTContext()); SmallString<32> FixBuffer; StringRef Separator = LSI->NumExplicitCaptures > 0 ? ", " : ""; if (Var->getDeclName().isIdentifier() && !Var->getName().empty()) { SourceLocation VarInsertLoc = LSI->IntroducerRange.getEnd(); if (ShouldOfferCopyFix) { // Offer fixes to insert an explicit capture for the variable. // [] -> [VarName] // [OtherCapture] -> [OtherCapture, VarName] FixBuffer.assign({Separator, Var->getName()}); Sema.Diag(VarInsertLoc, diag::note_lambda_variable_capture_fixit) << Var << /*value*/ 0 << FixItHint::CreateInsertion(VarInsertLoc, FixBuffer); } // As above but capture by reference. FixBuffer.assign({Separator, "&", Var->getName()}); Sema.Diag(VarInsertLoc, diag::note_lambda_variable_capture_fixit) << Var << /*reference*/ 1 << FixItHint::CreateInsertion(VarInsertLoc, FixBuffer); } // Only try to offer default capture if there are no captures excluding this // and init captures. // [this]: OK. // [X = Y]: OK. // [&A, &B]: Don't offer. // [A, B]: Don't offer. if (llvm::any_of(LSI->Captures, [](Capture &C) { return !C.isThisCapture() && !C.isInitCapture(); })) return; // The default capture specifiers, '=' or '&', must appear first in the // capture body. SourceLocation DefaultInsertLoc = LSI->IntroducerRange.getBegin().getLocWithOffset(1); if (ShouldOfferCopyFix) { bool CanDefaultCopyCapture = true; // [=, *this] OK since c++17 // [=, this] OK since c++20 if (LSI->isCXXThisCaptured() && !Sema.getLangOpts().CPlusPlus20) CanDefaultCopyCapture = Sema.getLangOpts().CPlusPlus17 ? LSI->getCXXThisCapture().isCopyCapture() : false; // We can't use default capture by copy if any captures already specified // capture by copy. if (CanDefaultCopyCapture && llvm::none_of(LSI->Captures, [](Capture &C) { return !C.isThisCapture() && !C.isInitCapture() && C.isCopyCapture(); })) { FixBuffer.assign({"=", Separator}); Sema.Diag(DefaultInsertLoc, diag::note_lambda_default_capture_fixit) << /*value*/ 0 << FixItHint::CreateInsertion(DefaultInsertLoc, FixBuffer); } } // We can't use default capture by reference if any captures already specified // capture by reference. if (llvm::none_of(LSI->Captures, [](Capture &C) { return !C.isInitCapture() && C.isReferenceCapture() && !C.isThisCapture(); })) { FixBuffer.assign({"&", Separator}); Sema.Diag(DefaultInsertLoc, diag::note_lambda_default_capture_fixit) << /*reference*/ 1 << FixItHint::CreateInsertion(DefaultInsertLoc, FixBuffer); } } bool Sema::tryCaptureVariable( VarDecl *Var, SourceLocation ExprLoc, TryCaptureKind Kind, SourceLocation EllipsisLoc, bool BuildAndDiagnose, QualType &CaptureType, QualType &DeclRefType, const unsigned *const FunctionScopeIndexToStopAt) { // An init-capture is notionally from the context surrounding its // declaration, but its parent DC is the lambda class. DeclContext *VarDC = Var->getDeclContext(); if (Var->isInitCapture()) VarDC = VarDC->getParent(); DeclContext *DC = CurContext; const unsigned MaxFunctionScopesIndex = FunctionScopeIndexToStopAt ? *FunctionScopeIndexToStopAt : FunctionScopes.size() - 1; // We need to sync up the Declaration Context with the // FunctionScopeIndexToStopAt if (FunctionScopeIndexToStopAt) { unsigned FSIndex = FunctionScopes.size() - 1; while (FSIndex != MaxFunctionScopesIndex) { DC = getLambdaAwareParentOfDeclContext(DC); --FSIndex; } } // If the variable is declared in the current context, there is no need to // capture it. if (VarDC == DC) return true; // Capture global variables if it is required to use private copy of this // variable. bool IsGlobal = !Var->hasLocalStorage(); if (IsGlobal && !(LangOpts.OpenMP && isOpenMPCapturedDecl(Var, /*CheckScopeInfo=*/true, MaxFunctionScopesIndex))) return true; Var = Var->getCanonicalDecl(); // Walk up the stack to determine whether we can capture the variable, // performing the "simple" checks that don't depend on type. We stop when // we've either hit the declared scope of the variable or find an existing // capture of that variable. We start from the innermost capturing-entity // (the DC) and ensure that all intervening capturing-entities // (blocks/lambdas etc.) between the innermost capturer and the variable`s // declcontext can either capture the variable or have already captured // the variable. CaptureType = Var->getType(); DeclRefType = CaptureType.getNonReferenceType(); bool Nested = false; bool Explicit = (Kind != TryCapture_Implicit); unsigned FunctionScopesIndex = MaxFunctionScopesIndex; do { // Only block literals, captured statements, and lambda expressions can // capture; other scopes don't work. DeclContext *ParentDC = getParentOfCapturingContextOrNull(DC, Var, ExprLoc, BuildAndDiagnose, *this); // We need to check for the parent *first* because, if we *have* // private-captured a global variable, we need to recursively capture it in // intermediate blocks, lambdas, etc. if (!ParentDC) { if (IsGlobal) { FunctionScopesIndex = MaxFunctionScopesIndex - 1; break; } return true; } FunctionScopeInfo *FSI = FunctionScopes[FunctionScopesIndex]; CapturingScopeInfo *CSI = cast(FSI); // Check whether we've already captured it. if (isVariableAlreadyCapturedInScopeInfo(CSI, Var, Nested, CaptureType, DeclRefType)) { CSI->getCapture(Var).markUsed(BuildAndDiagnose); break; } // If we are instantiating a generic lambda call operator body, // we do not want to capture new variables. What was captured // during either a lambdas transformation or initial parsing // should be used. if (isGenericLambdaCallOperatorSpecialization(DC)) { if (BuildAndDiagnose) { LambdaScopeInfo *LSI = cast(CSI); if (LSI->ImpCaptureStyle == CapturingScopeInfo::ImpCap_None) { Diag(ExprLoc, diag::err_lambda_impcap) << Var; Diag(Var->getLocation(), diag::note_previous_decl) << Var; Diag(LSI->Lambda->getBeginLoc(), diag::note_lambda_decl); buildLambdaCaptureFixit(*this, LSI, Var); } else diagnoseUncapturableValueReference(*this, ExprLoc, Var); } return true; } // Try to capture variable-length arrays types. if (Var->getType()->isVariablyModifiedType()) { // We're going to walk down into the type and look for VLA // expressions. QualType QTy = Var->getType(); if (ParmVarDecl *PVD = dyn_cast_or_null(Var)) QTy = PVD->getOriginalType(); captureVariablyModifiedType(Context, QTy, CSI); } if (getLangOpts().OpenMP) { if (auto *RSI = dyn_cast(CSI)) { // OpenMP private variables should not be captured in outer scope, so // just break here. Similarly, global variables that are captured in a // target region should not be captured outside the scope of the region. if (RSI->CapRegionKind == CR_OpenMP) { OpenMPClauseKind IsOpenMPPrivateDecl = isOpenMPPrivateDecl( Var, RSI->OpenMPLevel, RSI->OpenMPCaptureLevel); // If the variable is private (i.e. not captured) and has variably // modified type, we still need to capture the type for correct // codegen in all regions, associated with the construct. Currently, // it is captured in the innermost captured region only. if (IsOpenMPPrivateDecl != OMPC_unknown && Var->getType()->isVariablyModifiedType()) { QualType QTy = Var->getType(); if (ParmVarDecl *PVD = dyn_cast_or_null(Var)) QTy = PVD->getOriginalType(); for (int I = 1, E = getNumberOfConstructScopes(RSI->OpenMPLevel); I < E; ++I) { auto *OuterRSI = cast( FunctionScopes[FunctionScopesIndex - I]); assert(RSI->OpenMPLevel == OuterRSI->OpenMPLevel && "Wrong number of captured regions associated with the " "OpenMP construct."); captureVariablyModifiedType(Context, QTy, OuterRSI); } } bool IsTargetCap = IsOpenMPPrivateDecl != OMPC_private && isOpenMPTargetCapturedDecl(Var, RSI->OpenMPLevel, RSI->OpenMPCaptureLevel); // Do not capture global if it is not privatized in outer regions. bool IsGlobalCap = IsGlobal && isOpenMPGlobalCapturedDecl(Var, RSI->OpenMPLevel, RSI->OpenMPCaptureLevel); // When we detect target captures we are looking from inside the // target region, therefore we need to propagate the capture from the // enclosing region. Therefore, the capture is not initially nested. if (IsTargetCap) adjustOpenMPTargetScopeIndex(FunctionScopesIndex, RSI->OpenMPLevel); if (IsTargetCap || IsOpenMPPrivateDecl == OMPC_private || (IsGlobal && !IsGlobalCap)) { Nested = !IsTargetCap; bool HasConst = DeclRefType.isConstQualified(); DeclRefType = DeclRefType.getUnqualifiedType(); // Don't lose diagnostics about assignments to const. if (HasConst) DeclRefType.addConst(); CaptureType = Context.getLValueReferenceType(DeclRefType); break; } } } } if (CSI->ImpCaptureStyle == CapturingScopeInfo::ImpCap_None && !Explicit) { // No capture-default, and this is not an explicit capture // so cannot capture this variable. if (BuildAndDiagnose) { Diag(ExprLoc, diag::err_lambda_impcap) << Var; Diag(Var->getLocation(), diag::note_previous_decl) << Var; auto *LSI = cast(CSI); if (LSI->Lambda) { Diag(LSI->Lambda->getBeginLoc(), diag::note_lambda_decl); buildLambdaCaptureFixit(*this, LSI, Var); } // FIXME: If we error out because an outer lambda can not implicitly // capture a variable that an inner lambda explicitly captures, we // should have the inner lambda do the explicit capture - because // it makes for cleaner diagnostics later. This would purely be done // so that the diagnostic does not misleadingly claim that a variable // can not be captured by a lambda implicitly even though it is captured // explicitly. Suggestion: // - create const bool VariableCaptureWasInitiallyExplicit = Explicit // at the function head // - cache the StartingDeclContext - this must be a lambda // - captureInLambda in the innermost lambda the variable. } return true; } FunctionScopesIndex--; DC = ParentDC; Explicit = false; } while (!VarDC->Equals(DC)); // Walk back down the scope stack, (e.g. from outer lambda to inner lambda) // computing the type of the capture at each step, checking type-specific // requirements, and adding captures if requested. // If the variable had already been captured previously, we start capturing // at the lambda nested within that one. bool Invalid = false; for (unsigned I = ++FunctionScopesIndex, N = MaxFunctionScopesIndex + 1; I != N; ++I) { CapturingScopeInfo *CSI = cast(FunctionScopes[I]); // Certain capturing entities (lambdas, blocks etc.) are not allowed to capture // certain types of variables (unnamed, variably modified types etc.) // so check for eligibility. if (!Invalid) Invalid = !isVariableCapturable(CSI, Var, ExprLoc, BuildAndDiagnose, *this); // After encountering an error, if we're actually supposed to capture, keep // capturing in nested contexts to suppress any follow-on diagnostics. if (Invalid && !BuildAndDiagnose) return true; if (BlockScopeInfo *BSI = dyn_cast(CSI)) { Invalid = !captureInBlock(BSI, Var, ExprLoc, BuildAndDiagnose, CaptureType, DeclRefType, Nested, *this, Invalid); Nested = true; } else if (CapturedRegionScopeInfo *RSI = dyn_cast(CSI)) { Invalid = !captureInCapturedRegion( RSI, Var, ExprLoc, BuildAndDiagnose, CaptureType, DeclRefType, Nested, Kind, /*IsTopScope*/ I == N - 1, *this, Invalid); Nested = true; } else { LambdaScopeInfo *LSI = cast(CSI); Invalid = !captureInLambda(LSI, Var, ExprLoc, BuildAndDiagnose, CaptureType, DeclRefType, Nested, Kind, EllipsisLoc, /*IsTopScope*/ I == N - 1, *this, Invalid); Nested = true; } if (Invalid && !BuildAndDiagnose) return true; } return Invalid; } bool Sema::tryCaptureVariable(VarDecl *Var, SourceLocation Loc, TryCaptureKind Kind, SourceLocation EllipsisLoc) { QualType CaptureType; QualType DeclRefType; return tryCaptureVariable(Var, Loc, Kind, EllipsisLoc, /*BuildAndDiagnose=*/true, CaptureType, DeclRefType, nullptr); } bool Sema::NeedToCaptureVariable(VarDecl *Var, SourceLocation Loc) { QualType CaptureType; QualType DeclRefType; return !tryCaptureVariable(Var, Loc, TryCapture_Implicit, SourceLocation(), /*BuildAndDiagnose=*/false, CaptureType, DeclRefType, nullptr); } QualType Sema::getCapturedDeclRefType(VarDecl *Var, SourceLocation Loc) { QualType CaptureType; QualType DeclRefType; // Determine whether we can capture this variable. if (tryCaptureVariable(Var, Loc, TryCapture_Implicit, SourceLocation(), /*BuildAndDiagnose=*/false, CaptureType, DeclRefType, nullptr)) return QualType(); return DeclRefType; } namespace { // Helper to copy the template arguments from a DeclRefExpr or MemberExpr. // The produced TemplateArgumentListInfo* points to data stored within this // object, so should only be used in contexts where the pointer will not be // used after the CopiedTemplateArgs object is destroyed. class CopiedTemplateArgs { bool HasArgs; TemplateArgumentListInfo TemplateArgStorage; public: template CopiedTemplateArgs(RefExpr *E) : HasArgs(E->hasExplicitTemplateArgs()) { if (HasArgs) E->copyTemplateArgumentsInto(TemplateArgStorage); } operator TemplateArgumentListInfo*() #ifdef __has_cpp_attribute #if __has_cpp_attribute(clang::lifetimebound) [[clang::lifetimebound]] #endif #endif { return HasArgs ? &TemplateArgStorage : nullptr; } }; } /// Walk the set of potential results of an expression and mark them all as /// non-odr-uses if they satisfy the side-conditions of the NonOdrUseReason. /// /// \return A new expression if we found any potential results, ExprEmpty() if /// not, and ExprError() if we diagnosed an error. static ExprResult rebuildPotentialResultsAsNonOdrUsed(Sema &S, Expr *E, NonOdrUseReason NOUR) { // Per C++11 [basic.def.odr], a variable is odr-used "unless it is // an object that satisfies the requirements for appearing in a // constant expression (5.19) and the lvalue-to-rvalue conversion (4.1) // is immediately applied." This function handles the lvalue-to-rvalue // conversion part. // // If we encounter a node that claims to be an odr-use but shouldn't be, we // transform it into the relevant kind of non-odr-use node and rebuild the // tree of nodes leading to it. // // This is a mini-TreeTransform that only transforms a restricted subset of // nodes (and only certain operands of them). // Rebuild a subexpression. auto Rebuild = [&](Expr *Sub) { return rebuildPotentialResultsAsNonOdrUsed(S, Sub, NOUR); }; // Check whether a potential result satisfies the requirements of NOUR. auto IsPotentialResultOdrUsed = [&](NamedDecl *D) { // Any entity other than a VarDecl is always odr-used whenever it's named // in a potentially-evaluated expression. auto *VD = dyn_cast(D); if (!VD) return true; // C++2a [basic.def.odr]p4: // A variable x whose name appears as a potentially-evalauted expression // e is odr-used by e unless // -- x is a reference that is usable in constant expressions, or // -- x is a variable of non-reference type that is usable in constant // expressions and has no mutable subobjects, and e is an element of // the set of potential results of an expression of // non-volatile-qualified non-class type to which the lvalue-to-rvalue // conversion is applied, or // -- x is a variable of non-reference type, and e is an element of the // set of potential results of a discarded-value expression to which // the lvalue-to-rvalue conversion is not applied // // We check the first bullet and the "potentially-evaluated" condition in // BuildDeclRefExpr. We check the type requirements in the second bullet // in CheckLValueToRValueConversionOperand below. switch (NOUR) { case NOUR_None: case NOUR_Unevaluated: llvm_unreachable("unexpected non-odr-use-reason"); case NOUR_Constant: // Constant references were handled when they were built. if (VD->getType()->isReferenceType()) return true; if (auto *RD = VD->getType()->getAsCXXRecordDecl()) if (RD->hasMutableFields()) return true; if (!VD->isUsableInConstantExpressions(S.Context)) return true; break; case NOUR_Discarded: if (VD->getType()->isReferenceType()) return true; break; } return false; }; // Mark that this expression does not constitute an odr-use. auto MarkNotOdrUsed = [&] { S.MaybeODRUseExprs.remove(E); if (LambdaScopeInfo *LSI = S.getCurLambda()) LSI->markVariableExprAsNonODRUsed(E); }; // C++2a [basic.def.odr]p2: // The set of potential results of an expression e is defined as follows: switch (E->getStmtClass()) { // -- If e is an id-expression, ... case Expr::DeclRefExprClass: { auto *DRE = cast(E); if (DRE->isNonOdrUse() || IsPotentialResultOdrUsed(DRE->getDecl())) break; // Rebuild as a non-odr-use DeclRefExpr. MarkNotOdrUsed(); return DeclRefExpr::Create( S.Context, DRE->getQualifierLoc(), DRE->getTemplateKeywordLoc(), DRE->getDecl(), DRE->refersToEnclosingVariableOrCapture(), DRE->getNameInfo(), DRE->getType(), DRE->getValueKind(), DRE->getFoundDecl(), CopiedTemplateArgs(DRE), NOUR); } case Expr::FunctionParmPackExprClass: { auto *FPPE = cast(E); // If any of the declarations in the pack is odr-used, then the expression // as a whole constitutes an odr-use. for (VarDecl *D : *FPPE) if (IsPotentialResultOdrUsed(D)) return ExprEmpty(); // FIXME: Rebuild as a non-odr-use FunctionParmPackExpr? In practice, // nothing cares about whether we marked this as an odr-use, but it might // be useful for non-compiler tools. MarkNotOdrUsed(); break; } // -- If e is a subscripting operation with an array operand... case Expr::ArraySubscriptExprClass: { auto *ASE = cast(E); Expr *OldBase = ASE->getBase()->IgnoreImplicit(); if (!OldBase->getType()->isArrayType()) break; ExprResult Base = Rebuild(OldBase); if (!Base.isUsable()) return Base; Expr *LHS = ASE->getBase() == ASE->getLHS() ? Base.get() : ASE->getLHS(); Expr *RHS = ASE->getBase() == ASE->getRHS() ? Base.get() : ASE->getRHS(); SourceLocation LBracketLoc = ASE->getBeginLoc(); // FIXME: Not stored. return S.ActOnArraySubscriptExpr(nullptr, LHS, LBracketLoc, RHS, ASE->getRBracketLoc()); } case Expr::MemberExprClass: { auto *ME = cast(E); // -- If e is a class member access expression [...] naming a non-static // data member... if (isa(ME->getMemberDecl())) { ExprResult Base = Rebuild(ME->getBase()); if (!Base.isUsable()) return Base; return MemberExpr::Create( S.Context, Base.get(), ME->isArrow(), ME->getOperatorLoc(), ME->getQualifierLoc(), ME->getTemplateKeywordLoc(), ME->getMemberDecl(), ME->getFoundDecl(), ME->getMemberNameInfo(), CopiedTemplateArgs(ME), ME->getType(), ME->getValueKind(), ME->getObjectKind(), ME->isNonOdrUse()); } if (ME->getMemberDecl()->isCXXInstanceMember()) break; // -- If e is a class member access expression naming a static data member, // ... if (ME->isNonOdrUse() || IsPotentialResultOdrUsed(ME->getMemberDecl())) break; // Rebuild as a non-odr-use MemberExpr. MarkNotOdrUsed(); return MemberExpr::Create( S.Context, ME->getBase(), ME->isArrow(), ME->getOperatorLoc(), ME->getQualifierLoc(), ME->getTemplateKeywordLoc(), ME->getMemberDecl(), ME->getFoundDecl(), ME->getMemberNameInfo(), CopiedTemplateArgs(ME), ME->getType(), ME->getValueKind(), ME->getObjectKind(), NOUR); } case Expr::BinaryOperatorClass: { auto *BO = cast(E); Expr *LHS = BO->getLHS(); Expr *RHS = BO->getRHS(); // -- If e is a pointer-to-member expression of the form e1 .* e2 ... if (BO->getOpcode() == BO_PtrMemD) { ExprResult Sub = Rebuild(LHS); if (!Sub.isUsable()) return Sub; LHS = Sub.get(); // -- If e is a comma expression, ... } else if (BO->getOpcode() == BO_Comma) { ExprResult Sub = Rebuild(RHS); if (!Sub.isUsable()) return Sub; RHS = Sub.get(); } else { break; } return S.BuildBinOp(nullptr, BO->getOperatorLoc(), BO->getOpcode(), LHS, RHS); } // -- If e has the form (e1)... case Expr::ParenExprClass: { auto *PE = cast(E); ExprResult Sub = Rebuild(PE->getSubExpr()); if (!Sub.isUsable()) return Sub; return S.ActOnParenExpr(PE->getLParen(), PE->getRParen(), Sub.get()); } // -- If e is a glvalue conditional expression, ... // We don't apply this to a binary conditional operator. FIXME: Should we? case Expr::ConditionalOperatorClass: { auto *CO = cast(E); ExprResult LHS = Rebuild(CO->getLHS()); if (LHS.isInvalid()) return ExprError(); ExprResult RHS = Rebuild(CO->getRHS()); if (RHS.isInvalid()) return ExprError(); if (!LHS.isUsable() && !RHS.isUsable()) return ExprEmpty(); if (!LHS.isUsable()) LHS = CO->getLHS(); if (!RHS.isUsable()) RHS = CO->getRHS(); return S.ActOnConditionalOp(CO->getQuestionLoc(), CO->getColonLoc(), CO->getCond(), LHS.get(), RHS.get()); } // [Clang extension] // -- If e has the form __extension__ e1... case Expr::UnaryOperatorClass: { auto *UO = cast(E); if (UO->getOpcode() != UO_Extension) break; ExprResult Sub = Rebuild(UO->getSubExpr()); if (!Sub.isUsable()) return Sub; return S.BuildUnaryOp(nullptr, UO->getOperatorLoc(), UO_Extension, Sub.get()); } // [Clang extension] // -- If e has the form _Generic(...), the set of potential results is the // union of the sets of potential results of the associated expressions. case Expr::GenericSelectionExprClass: { auto *GSE = cast(E); SmallVector AssocExprs; bool AnyChanged = false; for (Expr *OrigAssocExpr : GSE->getAssocExprs()) { ExprResult AssocExpr = Rebuild(OrigAssocExpr); if (AssocExpr.isInvalid()) return ExprError(); if (AssocExpr.isUsable()) { AssocExprs.push_back(AssocExpr.get()); AnyChanged = true; } else { AssocExprs.push_back(OrigAssocExpr); } } return AnyChanged ? S.CreateGenericSelectionExpr( GSE->getGenericLoc(), GSE->getDefaultLoc(), GSE->getRParenLoc(), GSE->getControllingExpr(), GSE->getAssocTypeSourceInfos(), AssocExprs) : ExprEmpty(); } // [Clang extension] // -- If e has the form __builtin_choose_expr(...), the set of potential // results is the union of the sets of potential results of the // second and third subexpressions. case Expr::ChooseExprClass: { auto *CE = cast(E); ExprResult LHS = Rebuild(CE->getLHS()); if (LHS.isInvalid()) return ExprError(); ExprResult RHS = Rebuild(CE->getLHS()); if (RHS.isInvalid()) return ExprError(); if (!LHS.get() && !RHS.get()) return ExprEmpty(); if (!LHS.isUsable()) LHS = CE->getLHS(); if (!RHS.isUsable()) RHS = CE->getRHS(); return S.ActOnChooseExpr(CE->getBuiltinLoc(), CE->getCond(), LHS.get(), RHS.get(), CE->getRParenLoc()); } // Step through non-syntactic nodes. case Expr::ConstantExprClass: { auto *CE = cast(E); ExprResult Sub = Rebuild(CE->getSubExpr()); if (!Sub.isUsable()) return Sub; return ConstantExpr::Create(S.Context, Sub.get()); } // We could mostly rely on the recursive rebuilding to rebuild implicit // casts, but not at the top level, so rebuild them here. case Expr::ImplicitCastExprClass: { auto *ICE = cast(E); // Only step through the narrow set of cast kinds we expect to encounter. // Anything else suggests we've left the region in which potential results // can be found. switch (ICE->getCastKind()) { case CK_NoOp: case CK_DerivedToBase: case CK_UncheckedDerivedToBase: { ExprResult Sub = Rebuild(ICE->getSubExpr()); if (!Sub.isUsable()) return Sub; CXXCastPath Path(ICE->path()); return S.ImpCastExprToType(Sub.get(), ICE->getType(), ICE->getCastKind(), ICE->getValueKind(), &Path); } default: break; } break; } default: break; } // Can't traverse through this node. Nothing to do. return ExprEmpty(); } ExprResult Sema::CheckLValueToRValueConversionOperand(Expr *E) { // Check whether the operand is or contains an object of non-trivial C union // type. if (E->getType().isVolatileQualified() && (E->getType().hasNonTrivialToPrimitiveDestructCUnion() || E->getType().hasNonTrivialToPrimitiveCopyCUnion())) checkNonTrivialCUnion(E->getType(), E->getExprLoc(), Sema::NTCUC_LValueToRValueVolatile, NTCUK_Destruct|NTCUK_Copy); // C++2a [basic.def.odr]p4: // [...] an expression of non-volatile-qualified non-class type to which // the lvalue-to-rvalue conversion is applied [...] if (E->getType().isVolatileQualified() || E->getType()->getAs()) return E; ExprResult Result = rebuildPotentialResultsAsNonOdrUsed(*this, E, NOUR_Constant); if (Result.isInvalid()) return ExprError(); return Result.get() ? Result : E; } ExprResult Sema::ActOnConstantExpression(ExprResult Res) { Res = CorrectDelayedTyposInExpr(Res); if (!Res.isUsable()) return Res; // If a constant-expression is a reference to a variable where we delay // deciding whether it is an odr-use, just assume we will apply the // lvalue-to-rvalue conversion. In the one case where this doesn't happen // (a non-type template argument), we have special handling anyway. return CheckLValueToRValueConversionOperand(Res.get()); } void Sema::CleanupVarDeclMarking() { // Iterate through a local copy in case MarkVarDeclODRUsed makes a recursive // call. MaybeODRUseExprSet LocalMaybeODRUseExprs; std::swap(LocalMaybeODRUseExprs, MaybeODRUseExprs); for (Expr *E : LocalMaybeODRUseExprs) { if (auto *DRE = dyn_cast(E)) { MarkVarDeclODRUsed(cast(DRE->getDecl()), DRE->getLocation(), *this); } else if (auto *ME = dyn_cast(E)) { MarkVarDeclODRUsed(cast(ME->getMemberDecl()), ME->getMemberLoc(), *this); } else if (auto *FP = dyn_cast(E)) { for (VarDecl *VD : *FP) MarkVarDeclODRUsed(VD, FP->getParameterPackLocation(), *this); } else { llvm_unreachable("Unexpected expression"); } } assert(MaybeODRUseExprs.empty() && "MarkVarDeclODRUsed failed to cleanup MaybeODRUseExprs?"); } static void DoMarkVarDeclReferenced( Sema &SemaRef, SourceLocation Loc, VarDecl *Var, Expr *E, llvm::DenseMap &RefsMinusAssignments) { assert((!E || isa(E) || isa(E) || isa(E)) && "Invalid Expr argument to DoMarkVarDeclReferenced"); Var->setReferenced(); if (Var->isInvalidDecl()) return; auto *MSI = Var->getMemberSpecializationInfo(); TemplateSpecializationKind TSK = MSI ? MSI->getTemplateSpecializationKind() : Var->getTemplateSpecializationKind(); OdrUseContext OdrUse = isOdrUseContext(SemaRef); bool UsableInConstantExpr = Var->mightBeUsableInConstantExpressions(SemaRef.Context); if (Var->isLocalVarDeclOrParm() && !Var->hasExternalStorage()) { RefsMinusAssignments.insert({Var, 0}).first->getSecond()++; } // C++20 [expr.const]p12: // A variable [...] is needed for constant evaluation if it is [...] a // variable whose name appears as a potentially constant evaluated // expression that is either a contexpr variable or is of non-volatile // const-qualified integral type or of reference type bool NeededForConstantEvaluation = isPotentiallyConstantEvaluatedContext(SemaRef) && UsableInConstantExpr; bool NeedDefinition = OdrUse == OdrUseContext::Used || NeededForConstantEvaluation; assert(!isa(Var) && "Can't instantiate a partial template specialization."); // If this might be a member specialization of a static data member, check // the specialization is visible. We already did the checks for variable // template specializations when we created them. if (NeedDefinition && TSK != TSK_Undeclared && !isa(Var)) SemaRef.checkSpecializationVisibility(Loc, Var); // Perform implicit instantiation of static data members, static data member // templates of class templates, and variable template specializations. Delay // instantiations of variable templates, except for those that could be used // in a constant expression. if (NeedDefinition && isTemplateInstantiation(TSK)) { // Per C++17 [temp.explicit]p10, we may instantiate despite an explicit // instantiation declaration if a variable is usable in a constant // expression (among other cases). bool TryInstantiating = TSK == TSK_ImplicitInstantiation || (TSK == TSK_ExplicitInstantiationDeclaration && UsableInConstantExpr); if (TryInstantiating) { SourceLocation PointOfInstantiation = MSI ? MSI->getPointOfInstantiation() : Var->getPointOfInstantiation(); bool FirstInstantiation = PointOfInstantiation.isInvalid(); if (FirstInstantiation) { PointOfInstantiation = Loc; if (MSI) MSI->setPointOfInstantiation(PointOfInstantiation); // FIXME: Notify listener. else Var->setTemplateSpecializationKind(TSK, PointOfInstantiation); } if (UsableInConstantExpr) { // Do not defer instantiations of variables that could be used in a // constant expression. SemaRef.runWithSufficientStackSpace(PointOfInstantiation, [&] { SemaRef.InstantiateVariableDefinition(PointOfInstantiation, Var); }); // Re-set the member to trigger a recomputation of the dependence bits // for the expression. if (auto *DRE = dyn_cast_or_null(E)) DRE->setDecl(DRE->getDecl()); else if (auto *ME = dyn_cast_or_null(E)) ME->setMemberDecl(ME->getMemberDecl()); } else if (FirstInstantiation || isa(Var)) { // FIXME: For a specialization of a variable template, we don't // distinguish between "declaration and type implicitly instantiated" // and "implicit instantiation of definition requested", so we have // no direct way to avoid enqueueing the pending instantiation // multiple times. SemaRef.PendingInstantiations .push_back(std::make_pair(Var, PointOfInstantiation)); } } } // C++2a [basic.def.odr]p4: // A variable x whose name appears as a potentially-evaluated expression e // is odr-used by e unless // -- x is a reference that is usable in constant expressions // -- x is a variable of non-reference type that is usable in constant // expressions and has no mutable subobjects [FIXME], and e is an // element of the set of potential results of an expression of // non-volatile-qualified non-class type to which the lvalue-to-rvalue // conversion is applied // -- x is a variable of non-reference type, and e is an element of the set // of potential results of a discarded-value expression to which the // lvalue-to-rvalue conversion is not applied [FIXME] // // We check the first part of the second bullet here, and // Sema::CheckLValueToRValueConversionOperand deals with the second part. // FIXME: To get the third bullet right, we need to delay this even for // variables that are not usable in constant expressions. // If we already know this isn't an odr-use, there's nothing more to do. if (DeclRefExpr *DRE = dyn_cast_or_null(E)) if (DRE->isNonOdrUse()) return; if (MemberExpr *ME = dyn_cast_or_null(E)) if (ME->isNonOdrUse()) return; switch (OdrUse) { case OdrUseContext::None: assert((!E || isa(E)) && "missing non-odr-use marking for unevaluated decl ref"); break; case OdrUseContext::FormallyOdrUsed: // FIXME: Ignoring formal odr-uses results in incorrect lambda capture // behavior. break; case OdrUseContext::Used: // If we might later find that this expression isn't actually an odr-use, // delay the marking. if (E && Var->isUsableInConstantExpressions(SemaRef.Context)) SemaRef.MaybeODRUseExprs.insert(E); else MarkVarDeclODRUsed(Var, Loc, SemaRef); break; case OdrUseContext::Dependent: // If this is a dependent context, we don't need to mark variables as // odr-used, but we may still need to track them for lambda capture. // FIXME: Do we also need to do this inside dependent typeid expressions // (which are modeled as unevaluated at this point)? const bool RefersToEnclosingScope = (SemaRef.CurContext != Var->getDeclContext() && Var->getDeclContext()->isFunctionOrMethod() && Var->hasLocalStorage()); if (RefersToEnclosingScope) { LambdaScopeInfo *const LSI = SemaRef.getCurLambda(/*IgnoreNonLambdaCapturingScope=*/true); if (LSI && (!LSI->CallOperator || !LSI->CallOperator->Encloses(Var->getDeclContext()))) { // If a variable could potentially be odr-used, defer marking it so // until we finish analyzing the full expression for any // lvalue-to-rvalue // or discarded value conversions that would obviate odr-use. // Add it to the list of potential captures that will be analyzed // later (ActOnFinishFullExpr) for eventual capture and odr-use marking // unless the variable is a reference that was initialized by a constant // expression (this will never need to be captured or odr-used). // // FIXME: We can simplify this a lot after implementing P0588R1. assert(E && "Capture variable should be used in an expression."); if (!Var->getType()->isReferenceType() || !Var->isUsableInConstantExpressions(SemaRef.Context)) LSI->addPotentialCapture(E->IgnoreParens()); } } break; } } /// Mark a variable referenced, and check whether it is odr-used /// (C++ [basic.def.odr]p2, C99 6.9p3). Note that this should not be /// used directly for normal expressions referring to VarDecl. void Sema::MarkVariableReferenced(SourceLocation Loc, VarDecl *Var) { DoMarkVarDeclReferenced(*this, Loc, Var, nullptr, RefsMinusAssignments); } static void MarkExprReferenced(Sema &SemaRef, SourceLocation Loc, Decl *D, Expr *E, bool MightBeOdrUse, llvm::DenseMap &RefsMinusAssignments) { if (SemaRef.isInOpenMPDeclareTargetContext()) SemaRef.checkDeclIsAllowedInOpenMPTarget(E, D); if (VarDecl *Var = dyn_cast(D)) { DoMarkVarDeclReferenced(SemaRef, Loc, Var, E, RefsMinusAssignments); return; } SemaRef.MarkAnyDeclReferenced(Loc, D, MightBeOdrUse); // If this is a call to a method via a cast, also mark the method in the // derived class used in case codegen can devirtualize the call. const MemberExpr *ME = dyn_cast(E); if (!ME) return; CXXMethodDecl *MD = dyn_cast(ME->getMemberDecl()); if (!MD) return; // Only attempt to devirtualize if this is truly a virtual call. bool IsVirtualCall = MD->isVirtual() && ME->performsVirtualDispatch(SemaRef.getLangOpts()); if (!IsVirtualCall) return; // If it's possible to devirtualize the call, mark the called function // referenced. CXXMethodDecl *DM = MD->getDevirtualizedMethod( ME->getBase(), SemaRef.getLangOpts().AppleKext); if (DM) SemaRef.MarkAnyDeclReferenced(Loc, DM, MightBeOdrUse); } /// Perform reference-marking and odr-use handling for a DeclRefExpr. /// /// Note, this may change the dependence of the DeclRefExpr, and so needs to be /// handled with care if the DeclRefExpr is not newly-created. void Sema::MarkDeclRefReferenced(DeclRefExpr *E, const Expr *Base) { // TODO: update this with DR# once a defect report is filed. // C++11 defect. The address of a pure member should not be an ODR use, even // if it's a qualified reference. bool OdrUse = true; if (const CXXMethodDecl *Method = dyn_cast(E->getDecl())) if (Method->isVirtual() && !Method->getDevirtualizedMethod(Base, getLangOpts().AppleKext)) OdrUse = false; if (auto *FD = dyn_cast(E->getDecl())) if (!isUnevaluatedContext() && !isConstantEvaluated() && FD->isConsteval() && !RebuildingImmediateInvocation) ExprEvalContexts.back().ReferenceToConsteval.insert(E); MarkExprReferenced(*this, E->getLocation(), E->getDecl(), E, OdrUse, RefsMinusAssignments); } /// Perform reference-marking and odr-use handling for a MemberExpr. void Sema::MarkMemberReferenced(MemberExpr *E) { // C++11 [basic.def.odr]p2: // A non-overloaded function whose name appears as a potentially-evaluated // expression or a member of a set of candidate functions, if selected by // overload resolution when referred to from a potentially-evaluated // expression, is odr-used, unless it is a pure virtual function and its // name is not explicitly qualified. bool MightBeOdrUse = true; if (E->performsVirtualDispatch(getLangOpts())) { if (CXXMethodDecl *Method = dyn_cast(E->getMemberDecl())) if (Method->isPure()) MightBeOdrUse = false; } SourceLocation Loc = E->getMemberLoc().isValid() ? E->getMemberLoc() : E->getBeginLoc(); MarkExprReferenced(*this, Loc, E->getMemberDecl(), E, MightBeOdrUse, RefsMinusAssignments); } /// Perform reference-marking and odr-use handling for a FunctionParmPackExpr. void Sema::MarkFunctionParmPackReferenced(FunctionParmPackExpr *E) { for (VarDecl *VD : *E) MarkExprReferenced(*this, E->getParameterPackLocation(), VD, E, true, RefsMinusAssignments); } /// Perform marking for a reference to an arbitrary declaration. It /// marks the declaration referenced, and performs odr-use checking for /// functions and variables. This method should not be used when building a /// normal expression which refers to a variable. void Sema::MarkAnyDeclReferenced(SourceLocation Loc, Decl *D, bool MightBeOdrUse) { if (MightBeOdrUse) { if (auto *VD = dyn_cast(D)) { MarkVariableReferenced(Loc, VD); return; } } if (auto *FD = dyn_cast(D)) { MarkFunctionReferenced(Loc, FD, MightBeOdrUse); return; } D->setReferenced(); } namespace { // Mark all of the declarations used by a type as referenced. // FIXME: Not fully implemented yet! We need to have a better understanding // of when we're entering a context we should not recurse into. // FIXME: This is and EvaluatedExprMarker are more-or-less equivalent to // TreeTransforms rebuilding the type in a new context. Rather than // duplicating the TreeTransform logic, we should consider reusing it here. // Currently that causes problems when rebuilding LambdaExprs. class MarkReferencedDecls : public RecursiveASTVisitor { Sema &S; SourceLocation Loc; public: typedef RecursiveASTVisitor Inherited; MarkReferencedDecls(Sema &S, SourceLocation Loc) : S(S), Loc(Loc) { } bool TraverseTemplateArgument(const TemplateArgument &Arg); }; } bool MarkReferencedDecls::TraverseTemplateArgument( const TemplateArgument &Arg) { { // A non-type template argument is a constant-evaluated context. EnterExpressionEvaluationContext Evaluated( S, Sema::ExpressionEvaluationContext::ConstantEvaluated); if (Arg.getKind() == TemplateArgument::Declaration) { if (Decl *D = Arg.getAsDecl()) S.MarkAnyDeclReferenced(Loc, D, true); } else if (Arg.getKind() == TemplateArgument::Expression) { S.MarkDeclarationsReferencedInExpr(Arg.getAsExpr(), false); } } return Inherited::TraverseTemplateArgument(Arg); } void Sema::MarkDeclarationsReferencedInType(SourceLocation Loc, QualType T) { MarkReferencedDecls Marker(*this, Loc); Marker.TraverseType(T); } namespace { /// Helper class that marks all of the declarations referenced by /// potentially-evaluated subexpressions as "referenced". class EvaluatedExprMarker : public UsedDeclVisitor { public: typedef UsedDeclVisitor Inherited; bool SkipLocalVariables; ArrayRef StopAt; EvaluatedExprMarker(Sema &S, bool SkipLocalVariables, ArrayRef StopAt) : Inherited(S), SkipLocalVariables(SkipLocalVariables), StopAt(StopAt) {} void visitUsedDecl(SourceLocation Loc, Decl *D) { S.MarkFunctionReferenced(Loc, cast(D)); } void Visit(Expr *E) { if (llvm::is_contained(StopAt, E)) return; Inherited::Visit(E); } void VisitConstantExpr(ConstantExpr *E) { // Don't mark declarations within a ConstantExpression, as this expression // will be evaluated and folded to a value. } void VisitDeclRefExpr(DeclRefExpr *E) { // If we were asked not to visit local variables, don't. if (SkipLocalVariables) { if (VarDecl *VD = dyn_cast(E->getDecl())) if (VD->hasLocalStorage()) return; } // FIXME: This can trigger the instantiation of the initializer of a // variable, which can cause the expression to become value-dependent // or error-dependent. Do we need to propagate the new dependence bits? S.MarkDeclRefReferenced(E); } void VisitMemberExpr(MemberExpr *E) { S.MarkMemberReferenced(E); Visit(E->getBase()); } }; } // namespace /// Mark any declarations that appear within this expression or any /// potentially-evaluated subexpressions as "referenced". /// /// \param SkipLocalVariables If true, don't mark local variables as /// 'referenced'. /// \param StopAt Subexpressions that we shouldn't recurse into. void Sema::MarkDeclarationsReferencedInExpr(Expr *E, bool SkipLocalVariables, ArrayRef StopAt) { EvaluatedExprMarker(*this, SkipLocalVariables, StopAt).Visit(E); } /// Emit a diagnostic when statements are reachable. /// FIXME: check for reachability even in expressions for which we don't build a /// CFG (eg, in the initializer of a global or in a constant expression). /// For example, /// namespace { auto *p = new double[3][false ? (1, 2) : 3]; } bool Sema::DiagIfReachable(SourceLocation Loc, ArrayRef Stmts, const PartialDiagnostic &PD) { if (!Stmts.empty() && getCurFunctionOrMethodDecl()) { if (!FunctionScopes.empty()) FunctionScopes.back()->PossiblyUnreachableDiags.push_back( sema::PossiblyUnreachableDiag(PD, Loc, Stmts)); return true; } // The initializer of a constexpr variable or of the first declaration of a // static data member is not syntactically a constant evaluated constant, // but nonetheless is always required to be a constant expression, so we // can skip diagnosing. // FIXME: Using the mangling context here is a hack. if (auto *VD = dyn_cast_or_null( ExprEvalContexts.back().ManglingContextDecl)) { if (VD->isConstexpr() || (VD->isStaticDataMember() && VD->isFirstDecl() && !VD->isInline())) return false; // FIXME: For any other kind of variable, we should build a CFG for its // initializer and check whether the context in question is reachable. } Diag(Loc, PD); return true; } /// Emit a diagnostic that describes an effect on the run-time behavior /// of the program being compiled. /// /// This routine emits the given diagnostic when the code currently being /// type-checked is "potentially evaluated", meaning that there is a /// possibility that the code will actually be executable. Code in sizeof() /// expressions, code used only during overload resolution, etc., are not /// potentially evaluated. This routine will suppress such diagnostics or, /// in the absolutely nutty case of potentially potentially evaluated /// expressions (C++ typeid), queue the diagnostic to potentially emit it /// later. /// /// This routine should be used for all diagnostics that describe the run-time /// behavior of a program, such as passing a non-POD value through an ellipsis. /// Failure to do so will likely result in spurious diagnostics or failures /// during overload resolution or within sizeof/alignof/typeof/typeid. bool Sema::DiagRuntimeBehavior(SourceLocation Loc, ArrayRef Stmts, const PartialDiagnostic &PD) { if (ExprEvalContexts.back().isDiscardedStatementContext()) return false; switch (ExprEvalContexts.back().Context) { case ExpressionEvaluationContext::Unevaluated: case ExpressionEvaluationContext::UnevaluatedList: case ExpressionEvaluationContext::UnevaluatedAbstract: case ExpressionEvaluationContext::DiscardedStatement: // The argument will never be evaluated, so don't complain. break; case ExpressionEvaluationContext::ConstantEvaluated: case ExpressionEvaluationContext::ImmediateFunctionContext: // Relevant diagnostics should be produced by constant evaluation. break; case ExpressionEvaluationContext::PotentiallyEvaluated: case ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed: return DiagIfReachable(Loc, Stmts, PD); } return false; } bool Sema::DiagRuntimeBehavior(SourceLocation Loc, const Stmt *Statement, const PartialDiagnostic &PD) { return DiagRuntimeBehavior( Loc, Statement ? llvm::makeArrayRef(Statement) : llvm::None, PD); } bool Sema::CheckCallReturnType(QualType ReturnType, SourceLocation Loc, CallExpr *CE, FunctionDecl *FD) { if (ReturnType->isVoidType() || !ReturnType->isIncompleteType()) return false; // If we're inside a decltype's expression, don't check for a valid return // type or construct temporaries until we know whether this is the last call. if (ExprEvalContexts.back().ExprContext == ExpressionEvaluationContextRecord::EK_Decltype) { ExprEvalContexts.back().DelayedDecltypeCalls.push_back(CE); return false; } class CallReturnIncompleteDiagnoser : public TypeDiagnoser { FunctionDecl *FD; CallExpr *CE; public: CallReturnIncompleteDiagnoser(FunctionDecl *FD, CallExpr *CE) : FD(FD), CE(CE) { } void diagnose(Sema &S, SourceLocation Loc, QualType T) override { if (!FD) { S.Diag(Loc, diag::err_call_incomplete_return) << T << CE->getSourceRange(); return; } S.Diag(Loc, diag::err_call_function_incomplete_return) << CE->getSourceRange() << FD << T; S.Diag(FD->getLocation(), diag::note_entity_declared_at) << FD->getDeclName(); } } Diagnoser(FD, CE); if (RequireCompleteType(Loc, ReturnType, Diagnoser)) return true; return false; } // Diagnose the s/=/==/ and s/\|=/!=/ typos. Note that adding parentheses // will prevent this condition from triggering, which is what we want. void Sema::DiagnoseAssignmentAsCondition(Expr *E) { SourceLocation Loc; unsigned diagnostic = diag::warn_condition_is_assignment; bool IsOrAssign = false; if (BinaryOperator *Op = dyn_cast(E)) { if (Op->getOpcode() != BO_Assign && Op->getOpcode() != BO_OrAssign) return; IsOrAssign = Op->getOpcode() == BO_OrAssign; // Greylist some idioms by putting them into a warning subcategory. if (ObjCMessageExpr *ME = dyn_cast(Op->getRHS()->IgnoreParenCasts())) { Selector Sel = ME->getSelector(); // self = [ init...] if (isSelfExpr(Op->getLHS()) && ME->getMethodFamily() == OMF_init) diagnostic = diag::warn_condition_is_idiomatic_assignment; // = [ nextObject] else if (Sel.isUnarySelector() && Sel.getNameForSlot(0) == "nextObject") diagnostic = diag::warn_condition_is_idiomatic_assignment; } Loc = Op->getOperatorLoc(); } else if (CXXOperatorCallExpr *Op = dyn_cast(E)) { if (Op->getOperator() != OO_Equal && Op->getOperator() != OO_PipeEqual) return; IsOrAssign = Op->getOperator() == OO_PipeEqual; Loc = Op->getOperatorLoc(); } else if (PseudoObjectExpr *POE = dyn_cast(E)) return DiagnoseAssignmentAsCondition(POE->getSyntacticForm()); else { // Not an assignment. return; } Diag(Loc, diagnostic) << E->getSourceRange(); SourceLocation Open = E->getBeginLoc(); SourceLocation Close = getLocForEndOfToken(E->getSourceRange().getEnd()); Diag(Loc, diag::note_condition_assign_silence) << FixItHint::CreateInsertion(Open, "(") << FixItHint::CreateInsertion(Close, ")"); if (IsOrAssign) Diag(Loc, diag::note_condition_or_assign_to_comparison) << FixItHint::CreateReplacement(Loc, "!="); else Diag(Loc, diag::note_condition_assign_to_comparison) << FixItHint::CreateReplacement(Loc, "=="); } /// Redundant parentheses over an equality comparison can indicate /// that the user intended an assignment used as condition. void Sema::DiagnoseEqualityWithExtraParens(ParenExpr *ParenE) { // Don't warn if the parens came from a macro. SourceLocation parenLoc = ParenE->getBeginLoc(); if (parenLoc.isInvalid() || parenLoc.isMacroID()) return; // Don't warn for dependent expressions. if (ParenE->isTypeDependent()) return; Expr *E = ParenE->IgnoreParens(); if (BinaryOperator *opE = dyn_cast(E)) if (opE->getOpcode() == BO_EQ && opE->getLHS()->IgnoreParenImpCasts()->isModifiableLvalue(Context) == Expr::MLV_Valid) { SourceLocation Loc = opE->getOperatorLoc(); Diag(Loc, diag::warn_equality_with_extra_parens) << E->getSourceRange(); SourceRange ParenERange = ParenE->getSourceRange(); Diag(Loc, diag::note_equality_comparison_silence) << FixItHint::CreateRemoval(ParenERange.getBegin()) << FixItHint::CreateRemoval(ParenERange.getEnd()); Diag(Loc, diag::note_equality_comparison_to_assign) << FixItHint::CreateReplacement(Loc, "="); } } ExprResult Sema::CheckBooleanCondition(SourceLocation Loc, Expr *E, bool IsConstexpr) { DiagnoseAssignmentAsCondition(E); if (ParenExpr *parenE = dyn_cast(E)) DiagnoseEqualityWithExtraParens(parenE); ExprResult result = CheckPlaceholderExpr(E); if (result.isInvalid()) return ExprError(); E = result.get(); if (!E->isTypeDependent()) { if (getLangOpts().CPlusPlus) return CheckCXXBooleanCondition(E, IsConstexpr); // C++ 6.4p4 ExprResult ERes = DefaultFunctionArrayLvalueConversion(E); if (ERes.isInvalid()) return ExprError(); E = ERes.get(); QualType T = E->getType(); if (!T->isScalarType()) { // C99 6.8.4.1p1 Diag(Loc, diag::err_typecheck_statement_requires_scalar) << T << E->getSourceRange(); return ExprError(); } CheckBoolLikeConversion(E, Loc); } return E; } Sema::ConditionResult Sema::ActOnCondition(Scope *S, SourceLocation Loc, Expr *SubExpr, ConditionKind CK, bool MissingOK) { // MissingOK indicates whether having no condition expression is valid // (for loop) or invalid (e.g. while loop). if (!SubExpr) return MissingOK ? ConditionResult() : ConditionError(); ExprResult Cond; switch (CK) { case ConditionKind::Boolean: Cond = CheckBooleanCondition(Loc, SubExpr); break; case ConditionKind::ConstexprIf: Cond = CheckBooleanCondition(Loc, SubExpr, true); break; case ConditionKind::Switch: Cond = CheckSwitchCondition(Loc, SubExpr); break; } if (Cond.isInvalid()) { Cond = CreateRecoveryExpr(SubExpr->getBeginLoc(), SubExpr->getEndLoc(), {SubExpr}, PreferredConditionType(CK)); if (!Cond.get()) return ConditionError(); } // FIXME: FullExprArg doesn't have an invalid bit, so check nullness instead. FullExprArg FullExpr = MakeFullExpr(Cond.get(), Loc); if (!FullExpr.get()) return ConditionError(); return ConditionResult(*this, nullptr, FullExpr, CK == ConditionKind::ConstexprIf); } namespace { /// A visitor for rebuilding a call to an __unknown_any expression /// to have an appropriate type. struct RebuildUnknownAnyFunction : StmtVisitor { Sema &S; RebuildUnknownAnyFunction(Sema &S) : S(S) {} ExprResult VisitStmt(Stmt *S) { llvm_unreachable("unexpected statement!"); } ExprResult VisitExpr(Expr *E) { S.Diag(E->getExprLoc(), diag::err_unsupported_unknown_any_call) << E->getSourceRange(); return ExprError(); } /// Rebuild an expression which simply semantically wraps another /// expression which it shares the type and value kind of. template ExprResult rebuildSugarExpr(T *E) { ExprResult SubResult = Visit(E->getSubExpr()); if (SubResult.isInvalid()) return ExprError(); Expr *SubExpr = SubResult.get(); E->setSubExpr(SubExpr); E->setType(SubExpr->getType()); E->setValueKind(SubExpr->getValueKind()); assert(E->getObjectKind() == OK_Ordinary); return E; } ExprResult VisitParenExpr(ParenExpr *E) { return rebuildSugarExpr(E); } ExprResult VisitUnaryExtension(UnaryOperator *E) { return rebuildSugarExpr(E); } ExprResult VisitUnaryAddrOf(UnaryOperator *E) { ExprResult SubResult = Visit(E->getSubExpr()); if (SubResult.isInvalid()) return ExprError(); Expr *SubExpr = SubResult.get(); E->setSubExpr(SubExpr); E->setType(S.Context.getPointerType(SubExpr->getType())); assert(E->isPRValue()); assert(E->getObjectKind() == OK_Ordinary); return E; } ExprResult resolveDecl(Expr *E, ValueDecl *VD) { if (!isa(VD)) return VisitExpr(E); E->setType(VD->getType()); assert(E->isPRValue()); if (S.getLangOpts().CPlusPlus && !(isa(VD) && cast(VD)->isInstance())) E->setValueKind(VK_LValue); return E; } ExprResult VisitMemberExpr(MemberExpr *E) { return resolveDecl(E, E->getMemberDecl()); } ExprResult VisitDeclRefExpr(DeclRefExpr *E) { return resolveDecl(E, E->getDecl()); } }; } /// Given a function expression of unknown-any type, try to rebuild it /// to have a function type. static ExprResult rebuildUnknownAnyFunction(Sema &S, Expr *FunctionExpr) { ExprResult Result = RebuildUnknownAnyFunction(S).Visit(FunctionExpr); if (Result.isInvalid()) return ExprError(); return S.DefaultFunctionArrayConversion(Result.get()); } namespace { /// A visitor for rebuilding an expression of type __unknown_anytype /// into one which resolves the type directly on the referring /// expression. Strict preservation of the original source /// structure is not a goal. struct RebuildUnknownAnyExpr : StmtVisitor { Sema &S; /// The current destination type. QualType DestType; RebuildUnknownAnyExpr(Sema &S, QualType CastType) : S(S), DestType(CastType) {} ExprResult VisitStmt(Stmt *S) { llvm_unreachable("unexpected statement!"); } ExprResult VisitExpr(Expr *E) { S.Diag(E->getExprLoc(), diag::err_unsupported_unknown_any_expr) << E->getSourceRange(); return ExprError(); } ExprResult VisitCallExpr(CallExpr *E); ExprResult VisitObjCMessageExpr(ObjCMessageExpr *E); /// Rebuild an expression which simply semantically wraps another /// expression which it shares the type and value kind of. template ExprResult rebuildSugarExpr(T *E) { ExprResult SubResult = Visit(E->getSubExpr()); if (SubResult.isInvalid()) return ExprError(); Expr *SubExpr = SubResult.get(); E->setSubExpr(SubExpr); E->setType(SubExpr->getType()); E->setValueKind(SubExpr->getValueKind()); assert(E->getObjectKind() == OK_Ordinary); return E; } ExprResult VisitParenExpr(ParenExpr *E) { return rebuildSugarExpr(E); } ExprResult VisitUnaryExtension(UnaryOperator *E) { return rebuildSugarExpr(E); } ExprResult VisitUnaryAddrOf(UnaryOperator *E) { const PointerType *Ptr = DestType->getAs(); if (!Ptr) { S.Diag(E->getOperatorLoc(), diag::err_unknown_any_addrof) << E->getSourceRange(); return ExprError(); } if (isa(E->getSubExpr())) { S.Diag(E->getOperatorLoc(), diag::err_unknown_any_addrof_call) << E->getSourceRange(); return ExprError(); } assert(E->isPRValue()); assert(E->getObjectKind() == OK_Ordinary); E->setType(DestType); // Build the sub-expression as if it were an object of the pointee type. DestType = Ptr->getPointeeType(); ExprResult SubResult = Visit(E->getSubExpr()); if (SubResult.isInvalid()) return ExprError(); E->setSubExpr(SubResult.get()); return E; } ExprResult VisitImplicitCastExpr(ImplicitCastExpr *E); ExprResult resolveDecl(Expr *E, ValueDecl *VD); ExprResult VisitMemberExpr(MemberExpr *E) { return resolveDecl(E, E->getMemberDecl()); } ExprResult VisitDeclRefExpr(DeclRefExpr *E) { return resolveDecl(E, E->getDecl()); } }; } /// Rebuilds a call expression which yielded __unknown_anytype. ExprResult RebuildUnknownAnyExpr::VisitCallExpr(CallExpr *E) { Expr *CalleeExpr = E->getCallee(); enum FnKind { FK_MemberFunction, FK_FunctionPointer, FK_BlockPointer }; FnKind Kind; QualType CalleeType = CalleeExpr->getType(); if (CalleeType == S.Context.BoundMemberTy) { assert(isa(E) || isa(E)); Kind = FK_MemberFunction; CalleeType = Expr::findBoundMemberType(CalleeExpr); } else if (const PointerType *Ptr = CalleeType->getAs()) { CalleeType = Ptr->getPointeeType(); Kind = FK_FunctionPointer; } else { CalleeType = CalleeType->castAs()->getPointeeType(); Kind = FK_BlockPointer; } const FunctionType *FnType = CalleeType->castAs(); // Verify that this is a legal result type of a function. if (DestType->isArrayType() || DestType->isFunctionType()) { unsigned diagID = diag::err_func_returning_array_function; if (Kind == FK_BlockPointer) diagID = diag::err_block_returning_array_function; S.Diag(E->getExprLoc(), diagID) << DestType->isFunctionType() << DestType; return ExprError(); } // Otherwise, go ahead and set DestType as the call's result. E->setType(DestType.getNonLValueExprType(S.Context)); E->setValueKind(Expr::getValueKindForType(DestType)); assert(E->getObjectKind() == OK_Ordinary); // Rebuild the function type, replacing the result type with DestType. const FunctionProtoType *Proto = dyn_cast(FnType); if (Proto) { // __unknown_anytype(...) is a special case used by the debugger when // it has no idea what a function's signature is. // // We want to build this call essentially under the K&R // unprototyped rules, but making a FunctionNoProtoType in C++ // would foul up all sorts of assumptions. However, we cannot // simply pass all arguments as variadic arguments, nor can we // portably just call the function under a non-variadic type; see // the comment on IR-gen's TargetInfo::isNoProtoCallVariadic. // However, it turns out that in practice it is generally safe to // call a function declared as "A foo(B,C,D);" under the prototype // "A foo(B,C,D,...);". The only known exception is with the // Windows ABI, where any variadic function is implicitly cdecl // regardless of its normal CC. Therefore we change the parameter // types to match the types of the arguments. // // This is a hack, but it is far superior to moving the // corresponding target-specific code from IR-gen to Sema/AST. ArrayRef ParamTypes = Proto->getParamTypes(); SmallVector ArgTypes; if (ParamTypes.empty() && Proto->isVariadic()) { // the special case ArgTypes.reserve(E->getNumArgs()); for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { ArgTypes.push_back(S.Context.getReferenceQualifiedType(E->getArg(i))); } ParamTypes = ArgTypes; } DestType = S.Context.getFunctionType(DestType, ParamTypes, Proto->getExtProtoInfo()); } else { DestType = S.Context.getFunctionNoProtoType(DestType, FnType->getExtInfo()); } // Rebuild the appropriate pointer-to-function type. switch (Kind) { case FK_MemberFunction: // Nothing to do. break; case FK_FunctionPointer: DestType = S.Context.getPointerType(DestType); break; case FK_BlockPointer: DestType = S.Context.getBlockPointerType(DestType); break; } // Finally, we can recurse. ExprResult CalleeResult = Visit(CalleeExpr); if (!CalleeResult.isUsable()) return ExprError(); E->setCallee(CalleeResult.get()); // Bind a temporary if necessary. return S.MaybeBindToTemporary(E); } ExprResult RebuildUnknownAnyExpr::VisitObjCMessageExpr(ObjCMessageExpr *E) { // Verify that this is a legal result type of a call. if (DestType->isArrayType() || DestType->isFunctionType()) { S.Diag(E->getExprLoc(), diag::err_func_returning_array_function) << DestType->isFunctionType() << DestType; return ExprError(); } // Rewrite the method result type if available. if (ObjCMethodDecl *Method = E->getMethodDecl()) { assert(Method->getReturnType() == S.Context.UnknownAnyTy); Method->setReturnType(DestType); } // Change the type of the message. E->setType(DestType.getNonReferenceType()); E->setValueKind(Expr::getValueKindForType(DestType)); return S.MaybeBindToTemporary(E); } ExprResult RebuildUnknownAnyExpr::VisitImplicitCastExpr(ImplicitCastExpr *E) { // The only case we should ever see here is a function-to-pointer decay. if (E->getCastKind() == CK_FunctionToPointerDecay) { assert(E->isPRValue()); assert(E->getObjectKind() == OK_Ordinary); E->setType(DestType); // Rebuild the sub-expression as the pointee (function) type. DestType = DestType->castAs()->getPointeeType(); ExprResult Result = Visit(E->getSubExpr()); if (!Result.isUsable()) return ExprError(); E->setSubExpr(Result.get()); return E; } else if (E->getCastKind() == CK_LValueToRValue) { assert(E->isPRValue()); assert(E->getObjectKind() == OK_Ordinary); assert(isa(E->getType())); E->setType(DestType); // The sub-expression has to be a lvalue reference, so rebuild it as such. DestType = S.Context.getLValueReferenceType(DestType); ExprResult Result = Visit(E->getSubExpr()); if (!Result.isUsable()) return ExprError(); E->setSubExpr(Result.get()); return E; } else { llvm_unreachable("Unhandled cast type!"); } } ExprResult RebuildUnknownAnyExpr::resolveDecl(Expr *E, ValueDecl *VD) { ExprValueKind ValueKind = VK_LValue; QualType Type = DestType; // We know how to make this work for certain kinds of decls: // - functions if (FunctionDecl *FD = dyn_cast(VD)) { if (const PointerType *Ptr = Type->getAs()) { DestType = Ptr->getPointeeType(); ExprResult Result = resolveDecl(E, VD); if (Result.isInvalid()) return ExprError(); return S.ImpCastExprToType(Result.get(), Type, CK_FunctionToPointerDecay, VK_PRValue); } if (!Type->isFunctionType()) { S.Diag(E->getExprLoc(), diag::err_unknown_any_function) << VD << E->getSourceRange(); return ExprError(); } if (const FunctionProtoType *FT = Type->getAs()) { // We must match the FunctionDecl's type to the hack introduced in // RebuildUnknownAnyExpr::VisitCallExpr to vararg functions of unknown // type. See the lengthy commentary in that routine. QualType FDT = FD->getType(); const FunctionType *FnType = FDT->castAs(); const FunctionProtoType *Proto = dyn_cast_or_null(FnType); DeclRefExpr *DRE = dyn_cast(E); if (DRE && Proto && Proto->getParamTypes().empty() && Proto->isVariadic()) { SourceLocation Loc = FD->getLocation(); FunctionDecl *NewFD = FunctionDecl::Create( S.Context, FD->getDeclContext(), Loc, Loc, FD->getNameInfo().getName(), DestType, FD->getTypeSourceInfo(), SC_None, S.getCurFPFeatures().isFPConstrained(), false /*isInlineSpecified*/, FD->hasPrototype(), /*ConstexprKind*/ ConstexprSpecKind::Unspecified); if (FD->getQualifier()) NewFD->setQualifierInfo(FD->getQualifierLoc()); SmallVector Params; for (const auto &AI : FT->param_types()) { ParmVarDecl *Param = S.BuildParmVarDeclForTypedef(FD, Loc, AI); Param->setScopeInfo(0, Params.size()); Params.push_back(Param); } NewFD->setParams(Params); DRE->setDecl(NewFD); VD = DRE->getDecl(); } } if (CXXMethodDecl *MD = dyn_cast(FD)) if (MD->isInstance()) { ValueKind = VK_PRValue; Type = S.Context.BoundMemberTy; } // Function references aren't l-values in C. if (!S.getLangOpts().CPlusPlus) ValueKind = VK_PRValue; // - variables } else if (isa(VD)) { if (const ReferenceType *RefTy = Type->getAs()) { Type = RefTy->getPointeeType(); } else if (Type->isFunctionType()) { S.Diag(E->getExprLoc(), diag::err_unknown_any_var_function_type) << VD << E->getSourceRange(); return ExprError(); } // - nothing else } else { S.Diag(E->getExprLoc(), diag::err_unsupported_unknown_any_decl) << VD << E->getSourceRange(); return ExprError(); } // Modifying the declaration like this is friendly to IR-gen but // also really dangerous. VD->setType(DestType); E->setType(Type); E->setValueKind(ValueKind); return E; } /// Check a cast of an unknown-any type. We intentionally only /// trigger this for C-style casts. ExprResult Sema::checkUnknownAnyCast(SourceRange TypeRange, QualType CastType, Expr *CastExpr, CastKind &CastKind, ExprValueKind &VK, CXXCastPath &Path) { // The type we're casting to must be either void or complete. if (!CastType->isVoidType() && RequireCompleteType(TypeRange.getBegin(), CastType, diag::err_typecheck_cast_to_incomplete)) return ExprError(); // Rewrite the casted expression from scratch. ExprResult result = RebuildUnknownAnyExpr(*this, CastType).Visit(CastExpr); if (!result.isUsable()) return ExprError(); CastExpr = result.get(); VK = CastExpr->getValueKind(); CastKind = CK_NoOp; return CastExpr; } ExprResult Sema::forceUnknownAnyToType(Expr *E, QualType ToType) { return RebuildUnknownAnyExpr(*this, ToType).Visit(E); } ExprResult Sema::checkUnknownAnyArg(SourceLocation callLoc, Expr *arg, QualType ¶mType) { // If the syntactic form of the argument is not an explicit cast of // any sort, just do default argument promotion. ExplicitCastExpr *castArg = dyn_cast(arg->IgnoreParens()); if (!castArg) { ExprResult result = DefaultArgumentPromotion(arg); if (result.isInvalid()) return ExprError(); paramType = result.get()->getType(); return result; } // Otherwise, use the type that was written in the explicit cast. assert(!arg->hasPlaceholderType()); paramType = castArg->getTypeAsWritten(); // Copy-initialize a parameter of that type. InitializedEntity entity = InitializedEntity::InitializeParameter(Context, paramType, /*consumed*/ false); return PerformCopyInitialization(entity, callLoc, arg); } static ExprResult diagnoseUnknownAnyExpr(Sema &S, Expr *E) { Expr *orig = E; unsigned diagID = diag::err_uncasted_use_of_unknown_any; while (true) { E = E->IgnoreParenImpCasts(); if (CallExpr *call = dyn_cast(E)) { E = call->getCallee(); diagID = diag::err_uncasted_call_of_unknown_any; } else { break; } } SourceLocation loc; NamedDecl *d; if (DeclRefExpr *ref = dyn_cast(E)) { loc = ref->getLocation(); d = ref->getDecl(); } else if (MemberExpr *mem = dyn_cast(E)) { loc = mem->getMemberLoc(); d = mem->getMemberDecl(); } else if (ObjCMessageExpr *msg = dyn_cast(E)) { diagID = diag::err_uncasted_call_of_unknown_any; loc = msg->getSelectorStartLoc(); d = msg->getMethodDecl(); if (!d) { S.Diag(loc, diag::err_uncasted_send_to_unknown_any_method) << static_cast(msg->isClassMessage()) << msg->getSelector() << orig->getSourceRange(); return ExprError(); } } else { S.Diag(E->getExprLoc(), diag::err_unsupported_unknown_any_expr) << E->getSourceRange(); return ExprError(); } S.Diag(loc, diagID) << d << orig->getSourceRange(); // Never recoverable. return ExprError(); } /// Check for operands with placeholder types and complain if found. /// Returns ExprError() if there was an error and no recovery was possible. ExprResult Sema::CheckPlaceholderExpr(Expr *E) { if (!Context.isDependenceAllowed()) { // C cannot handle TypoExpr nodes on either side of a binop because it // doesn't handle dependent types properly, so make sure any TypoExprs have // been dealt with before checking the operands. ExprResult Result = CorrectDelayedTyposInExpr(E); if (!Result.isUsable()) return ExprError(); E = Result.get(); } const BuiltinType *placeholderType = E->getType()->getAsPlaceholderType(); if (!placeholderType) return E; switch (placeholderType->getKind()) { // Overloaded expressions. case BuiltinType::Overload: { // Try to resolve a single function template specialization. // This is obligatory. ExprResult Result = E; if (ResolveAndFixSingleFunctionTemplateSpecialization(Result, false)) return Result; // No guarantees that ResolveAndFixSingleFunctionTemplateSpecialization // leaves Result unchanged on failure. Result = E; if (resolveAndFixAddressOfSingleOverloadCandidate(Result)) return Result; // If that failed, try to recover with a call. tryToRecoverWithCall(Result, PDiag(diag::err_ovl_unresolvable), /*complain*/ true); return Result; } // Bound member functions. case BuiltinType::BoundMember: { ExprResult result = E; const Expr *BME = E->IgnoreParens(); PartialDiagnostic PD = PDiag(diag::err_bound_member_function); // Try to give a nicer diagnostic if it is a bound member that we recognize. if (isa(BME)) { PD = PDiag(diag::err_dtor_expr_without_call) << /*pseudo-destructor*/ 1; } else if (const auto *ME = dyn_cast(BME)) { if (ME->getMemberNameInfo().getName().getNameKind() == DeclarationName::CXXDestructorName) PD = PDiag(diag::err_dtor_expr_without_call) << /*destructor*/ 0; } tryToRecoverWithCall(result, PD, /*complain*/ true); return result; } // ARC unbridged casts. case BuiltinType::ARCUnbridgedCast: { Expr *realCast = stripARCUnbridgedCast(E); diagnoseARCUnbridgedCast(realCast); return realCast; } // Expressions of unknown type. case BuiltinType::UnknownAny: return diagnoseUnknownAnyExpr(*this, E); // Pseudo-objects. case BuiltinType::PseudoObject: return checkPseudoObjectRValue(E); case BuiltinType::BuiltinFn: { // Accept __noop without parens by implicitly converting it to a call expr. auto *DRE = dyn_cast(E->IgnoreParenImpCasts()); if (DRE) { auto *FD = cast(DRE->getDecl()); unsigned BuiltinID = FD->getBuiltinID(); if (BuiltinID == Builtin::BI__noop) { E = ImpCastExprToType(E, Context.getPointerType(FD->getType()), CK_BuiltinFnToFnPtr) .get(); return CallExpr::Create(Context, E, /*Args=*/{}, Context.IntTy, VK_PRValue, SourceLocation(), FPOptionsOverride()); } if (Context.BuiltinInfo.isInStdNamespace(BuiltinID)) { // Any use of these other than a direct call is ill-formed as of C++20, // because they are not addressable functions. In earlier language // modes, warn and force an instantiation of the real body. Diag(E->getBeginLoc(), getLangOpts().CPlusPlus20 ? diag::err_use_of_unaddressable_function : diag::warn_cxx20_compat_use_of_unaddressable_function); if (FD->isImplicitlyInstantiable()) { // Require a definition here because a normal attempt at // instantiation for a builtin will be ignored, and we won't try // again later. We assume that the definition of the template // precedes this use. InstantiateFunctionDefinition(E->getBeginLoc(), FD, /*Recursive=*/false, /*DefinitionRequired=*/true, /*AtEndOfTU=*/false); } // Produce a properly-typed reference to the function. CXXScopeSpec SS; SS.Adopt(DRE->getQualifierLoc()); TemplateArgumentListInfo TemplateArgs; DRE->copyTemplateArgumentsInto(TemplateArgs); return BuildDeclRefExpr( FD, FD->getType(), VK_LValue, DRE->getNameInfo(), DRE->hasQualifier() ? &SS : nullptr, DRE->getFoundDecl(), DRE->getTemplateKeywordLoc(), DRE->hasExplicitTemplateArgs() ? &TemplateArgs : nullptr); } } Diag(E->getBeginLoc(), diag::err_builtin_fn_use); return ExprError(); } case BuiltinType::IncompleteMatrixIdx: Diag(cast(E->IgnoreParens()) ->getRowIdx() ->getBeginLoc(), diag::err_matrix_incomplete_index); return ExprError(); // Expressions of unknown type. case BuiltinType::OMPArraySection: Diag(E->getBeginLoc(), diag::err_omp_array_section_use); return ExprError(); // Expressions of unknown type. case BuiltinType::OMPArrayShaping: return ExprError(Diag(E->getBeginLoc(), diag::err_omp_array_shaping_use)); case BuiltinType::OMPIterator: return ExprError(Diag(E->getBeginLoc(), diag::err_omp_iterator_use)); // Everything else should be impossible. #define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \ case BuiltinType::Id: #include "clang/Basic/OpenCLImageTypes.def" #define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \ case BuiltinType::Id: #include "clang/Basic/OpenCLExtensionTypes.def" #define SVE_TYPE(Name, Id, SingletonId) \ case BuiltinType::Id: #include "clang/Basic/AArch64SVEACLETypes.def" #define PPC_VECTOR_TYPE(Name, Id, Size) \ case BuiltinType::Id: #include "clang/Basic/PPCTypes.def" #define RVV_TYPE(Name, Id, SingletonId) case BuiltinType::Id: #include "clang/Basic/RISCVVTypes.def" #define BUILTIN_TYPE(Id, SingletonId) case BuiltinType::Id: #define PLACEHOLDER_TYPE(Id, SingletonId) #include "clang/AST/BuiltinTypes.def" break; } llvm_unreachable("invalid placeholder type!"); } bool Sema::CheckCaseExpression(Expr *E) { if (E->isTypeDependent()) return true; if (E->isValueDependent() || E->isIntegerConstantExpr(Context)) return E->getType()->isIntegralOrEnumerationType(); return false; } /// ActOnObjCBoolLiteral - Parse {__objc_yes,__objc_no} literals. ExprResult Sema::ActOnObjCBoolLiteral(SourceLocation OpLoc, tok::TokenKind Kind) { assert((Kind == tok::kw___objc_yes || Kind == tok::kw___objc_no) && "Unknown Objective-C Boolean value!"); QualType BoolT = Context.ObjCBuiltinBoolTy; if (!Context.getBOOLDecl()) { LookupResult Result(*this, &Context.Idents.get("BOOL"), OpLoc, Sema::LookupOrdinaryName); if (LookupName(Result, getCurScope()) && Result.isSingleResult()) { NamedDecl *ND = Result.getFoundDecl(); if (TypedefDecl *TD = dyn_cast(ND)) Context.setBOOLDecl(TD); } } if (Context.getBOOLDecl()) BoolT = Context.getBOOLType(); return new (Context) ObjCBoolLiteralExpr(Kind == tok::kw___objc_yes, BoolT, OpLoc); } ExprResult Sema::ActOnObjCAvailabilityCheckExpr( llvm::ArrayRef AvailSpecs, SourceLocation AtLoc, SourceLocation RParen) { auto FindSpecVersion = [&](StringRef Platform) -> Optional { auto Spec = llvm::find_if(AvailSpecs, [&](const AvailabilitySpec &Spec) { return Spec.getPlatform() == Platform; }); // Transcribe the "ios" availability check to "maccatalyst" when compiling // for "maccatalyst" if "maccatalyst" is not specified. if (Spec == AvailSpecs.end() && Platform == "maccatalyst") { Spec = llvm::find_if(AvailSpecs, [&](const AvailabilitySpec &Spec) { return Spec.getPlatform() == "ios"; }); } if (Spec == AvailSpecs.end()) return None; return Spec->getVersion(); }; VersionTuple Version; if (auto MaybeVersion = FindSpecVersion(Context.getTargetInfo().getPlatformName())) Version = *MaybeVersion; // The use of `@available` in the enclosing context should be analyzed to // warn when it's used inappropriately (i.e. not if(@available)). if (FunctionScopeInfo *Context = getCurFunctionAvailabilityContext()) Context->HasPotentialAvailabilityViolations = true; return new (Context) ObjCAvailabilityCheckExpr(Version, AtLoc, RParen, Context.BoolTy); } ExprResult Sema::CreateRecoveryExpr(SourceLocation Begin, SourceLocation End, ArrayRef SubExprs, QualType T) { if (!Context.getLangOpts().RecoveryAST) return ExprError(); if (isSFINAEContext()) return ExprError(); if (T.isNull() || T->isUndeducedType() || !Context.getLangOpts().RecoveryASTType) // We don't know the concrete type, fallback to dependent type. T = Context.DependentTy; return RecoveryExpr::Create(Context, T, Begin, End, SubExprs); } diff --git a/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h b/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h index 26686143af95..a54f8f5d2db8 100644 --- a/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h +++ b/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h @@ -1,140 +1,166 @@ //===- MarkupFilter.h -------------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// /// \file /// This file declares a filter that replaces symbolizer markup with /// human-readable expressions. /// //===----------------------------------------------------------------------===// #ifndef LLVM_DEBUGINFO_SYMBOLIZE_MARKUPFILTER_H #define LLVM_DEBUGINFO_SYMBOLIZE_MARKUPFILTER_H #include "Markup.h" #include #include "llvm/ADT/DenseMap.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" namespace llvm { namespace symbolize { +class LLVMSymbolizer; + /// Filter to convert parsed log symbolizer markup elements into human-readable /// text. class MarkupFilter { public: - MarkupFilter(raw_ostream &OS, Optional ColorsEnabled = llvm::None); + MarkupFilter(raw_ostream &OS, LLVMSymbolizer &Symbolizer, + Optional ColorsEnabled = llvm::None); /// Filters a line containing symbolizer markup and writes the human-readable /// results to the output stream. /// /// Invalid or unimplemented markup elements are removed. Some output may be /// deferred until future filter() or finish() call. void filter(StringRef Line); /// Records that the input stream has ended and writes any deferred output. void finish(); private: struct Module { uint64_t ID; std::string Name; SmallVector BuildID; }; struct MMap { uint64_t Addr; uint64_t Size; const Module *Mod; std::string Mode; // Lowercase uint64_t ModuleRelativeAddr; bool contains(uint64_t Addr) const; + uint64_t getModuleRelativeAddr(uint64_t Addr) const; }; // An informational module line currently being constructed. As many mmap // elements as possible are folded into one ModuleInfo line. struct ModuleInfoLine { const Module *Mod; SmallVector MMaps = {}; }; + // The semantics of a possible program counter value. + enum class PCType { + // The address is a return address and must be adjusted to point to the call + // itself. + ReturnAddress, + // The address is the precise location in the code and needs no adjustment. + PreciseCode, + }; + bool tryContextualElement(const MarkupNode &Node, const SmallVector &DeferredNodes); bool tryMMap(const MarkupNode &Element, const SmallVector &DeferredNodes); bool tryReset(const MarkupNode &Element, const SmallVector &DeferredNodes); bool tryModule(const MarkupNode &Element, const SmallVector &DeferredNodes); void beginModuleInfoLine(const Module *M); void endAnyModuleInfoLine(); void filterNode(const MarkupNode &Node); bool tryPresentation(const MarkupNode &Node); bool trySymbol(const MarkupNode &Node); + bool tryPC(const MarkupNode &Node); + bool tryBackTrace(const MarkupNode &Node); + bool tryData(const MarkupNode &Node); bool trySGR(const MarkupNode &Node); void highlight(); void highlightValue(); void restoreColor(); void resetColor(); + void printRawElement(const MarkupNode &Element); + void printValue(Twine Value); + Optional parseModule(const MarkupNode &Element) const; Optional parseMMap(const MarkupNode &Element) const; Optional parseAddr(StringRef Str) const; Optional parseModuleID(StringRef Str) const; Optional parseSize(StringRef Str) const; Optional> parseBuildID(StringRef Str) const; Optional parseMode(StringRef Str) const; + Optional parsePCType(StringRef Str) const; + Optional parseFrameNumber(StringRef Str) const; bool checkTag(const MarkupNode &Node) const; bool checkNumFields(const MarkupNode &Element, size_t Size) const; bool checkNumFieldsAtLeast(const MarkupNode &Element, size_t Size) const; + bool checkNumFieldsAtMost(const MarkupNode &Element, size_t Size) const; void reportTypeError(StringRef Str, StringRef TypeName) const; void reportLocation(StringRef::iterator Loc) const; - const MMap *overlappingMMap(const MMap &Map) const; + const MMap *getOverlappingMMap(const MMap &Map) const; + const MMap *getContainingMMap(uint64_t Addr) const; + + uint64_t adjustAddr(uint64_t Addr, PCType Type) const; StringRef lineEnding() const; raw_ostream &OS; + LLVMSymbolizer &Symbolizer; const bool ColorsEnabled; MarkupParser Parser; // Current line being filtered. StringRef Line; // A module info line currently being built. This incorporates as much mmap // information as possible before being emitted. Optional MIL; // SGR state. Optional Color; bool Bold = false; // Map from Module ID to Module. DenseMap> Modules; // Ordered map from starting address to mmap. std::map MMaps; }; } // end namespace symbolize } // end namespace llvm #endif // LLVM_DEBUGINFO_SYMBOLIZE_MARKUPFILTER_H diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 42a141e8876b..a7f9382478d4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1,25016 +1,25015 @@ //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run // both before and after the DAG is legalized. // // This pass is not a substitute for the LLVM IR instcombine pass. This pass is // primarily intended to handle simplification opportunities that are implicit // in the LLVM IR and exposed by the various codegen lowering phases. // //===----------------------------------------------------------------------===// #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IntervalMap.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/DAGCombine.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include #include #include #include #include #include #include #include using namespace llvm; #define DEBUG_TYPE "dagcombine" STATISTIC(NodesCombined , "Number of dag nodes combined"); STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); STATISTIC(SlicedLoads, "Number of load sliced"); STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops"); static cl::opt CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Enable DAG combiner's use of IR alias analysis")); static cl::opt UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), cl::desc("Enable DAG combiner's use of TBAA")); #ifndef NDEBUG static cl::opt CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, cl::desc("Only use DAG-combiner alias analysis in this" " function")); #endif /// Hidden option to stress test load slicing, i.e., when this option /// is enabled, load slicing bypasses most of its profitability guards. static cl::opt StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, cl::desc("Bypass the profitability model of load slicing"), cl::init(false)); static cl::opt MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), cl::desc("DAG combiner may split indexing from loads")); static cl::opt EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable merging multiple stores " "into a wider store")); static cl::opt TokenFactorInlineLimit( "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048), cl::desc("Limit the number of operands to inline for Token Factors")); static cl::opt StoreMergeDependenceLimit( "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10), cl::desc("Limit the number of times for the same StoreNode and RootNode " "to bail out in store merging dependence check")); static cl::opt EnableReduceLoadOpStoreWidth( "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable reducing the width of load/op/store " "sequence")); static cl::opt EnableShrinkLoadReplaceStoreWithStore( "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable load//store with " "a narrower store")); namespace { class DAGCombiner { SelectionDAG &DAG; const TargetLowering &TLI; const SelectionDAGTargetInfo *STI; CombineLevel Level = BeforeLegalizeTypes; CodeGenOpt::Level OptLevel; bool LegalDAG = false; bool LegalOperations = false; bool LegalTypes = false; bool ForCodeSize; bool DisableGenericCombines; /// Worklist of all of the nodes that need to be simplified. /// /// This must behave as a stack -- new nodes to process are pushed onto the /// back and when processing we pop off of the back. /// /// The worklist will not contain duplicates but may contain null entries /// due to nodes being deleted from the underlying DAG. SmallVector Worklist; /// Mapping from an SDNode to its position on the worklist. /// /// This is used to find and remove nodes from the worklist (by nulling /// them) when they are deleted from the underlying DAG. It relies on /// stable indices of nodes within the worklist. DenseMap WorklistMap; /// This records all nodes attempted to add to the worklist since we /// considered a new worklist entry. As we keep do not add duplicate nodes /// in the worklist, this is different from the tail of the worklist. SmallSetVector PruningList; /// Set of nodes which have been combined (at least once). /// /// This is used to allow us to reliably add any operands of a DAG node /// which have not yet been combined to the worklist. SmallPtrSet CombinedNodes; /// Map from candidate StoreNode to the pair of RootNode and count. /// The count is used to track how many times we have seen the StoreNode /// with the same RootNode bail out in dependence check. If we have seen /// the bail out for the same pair many times over a limit, we won't /// consider the StoreNode with the same RootNode as store merging /// candidate again. DenseMap> StoreRootCountMap; // AA - Used for DAG load/store alias analysis. AliasAnalysis *AA; /// When an instruction is simplified, add all users of the instruction to /// the work lists because they might get more simplified now. void AddUsersToWorklist(SDNode *N) { for (SDNode *Node : N->uses()) AddToWorklist(Node); } /// Convenient shorthand to add a node and all of its user to the worklist. void AddToWorklistWithUsers(SDNode *N) { AddUsersToWorklist(N); AddToWorklist(N); } // Prune potentially dangling nodes. This is called after // any visit to a node, but should also be called during a visit after any // failed combine which may have created a DAG node. void clearAddedDanglingWorklistEntries() { // Check any nodes added to the worklist to see if they are prunable. while (!PruningList.empty()) { auto *N = PruningList.pop_back_val(); if (N->use_empty()) recursivelyDeleteUnusedNodes(N); } } SDNode *getNextWorklistEntry() { // Before we do any work, remove nodes that are not in use. clearAddedDanglingWorklistEntries(); SDNode *N = nullptr; // The Worklist holds the SDNodes in order, but it may contain null // entries. while (!N && !Worklist.empty()) { N = Worklist.pop_back_val(); } if (N) { bool GoodWorklistEntry = WorklistMap.erase(N); (void)GoodWorklistEntry; assert(GoodWorklistEntry && "Found a worklist entry without a corresponding map entry!"); } return N; } /// Call the node-specific routine that folds each particular type of node. SDValue visit(SDNode *N); public: DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL) : DAG(D), TLI(D.getTargetLoweringInfo()), STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL), AA(AA) { ForCodeSize = DAG.shouldOptForSize(); DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel); MaximumLegalStoreInBits = 0; // We use the minimum store size here, since that's all we can guarantee // for the scalable vector types. for (MVT VT : MVT::all_valuetypes()) if (EVT(VT).isSimple() && VT != MVT::Other && TLI.isTypeLegal(EVT(VT)) && VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits) MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize(); } void ConsiderForPruning(SDNode *N) { // Mark this for potential pruning. PruningList.insert(N); } /// Add to the worklist making sure its instance is at the back (next to be /// processed.) void AddToWorklist(SDNode *N) { assert(N->getOpcode() != ISD::DELETED_NODE && "Deleted Node added to Worklist"); // Skip handle nodes as they can't usefully be combined and confuse the // zero-use deletion strategy. if (N->getOpcode() == ISD::HANDLENODE) return; ConsiderForPruning(N); if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second) Worklist.push_back(N); } /// Remove all instances of N from the worklist. void removeFromWorklist(SDNode *N) { CombinedNodes.erase(N); PruningList.remove(N); StoreRootCountMap.erase(N); auto It = WorklistMap.find(N); if (It == WorklistMap.end()) return; // Not in the worklist. // Null out the entry rather than erasing it to avoid a linear operation. Worklist[It->second] = nullptr; WorklistMap.erase(It); } void deleteAndRecombine(SDNode *N); bool recursivelyDeleteUnusedNodes(SDNode *N); /// Replaces all uses of the results of one DAG node with new values. SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, bool AddTo = true); /// Replaces all uses of the results of one DAG node with new values. SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) { return CombineTo(N, &Res, 1, AddTo); } /// Replaces all uses of the results of one DAG node with new values. SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo = true) { SDValue To[] = { Res0, Res1 }; return CombineTo(N, To, 2, AddTo); } void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO); private: unsigned MaximumLegalStoreInBits; /// Check the specified integer node value to see if it can be simplified or /// if things it uses can be simplified by bit propagation. /// If so, return true. bool SimplifyDemandedBits(SDValue Op) { unsigned BitWidth = Op.getScalarValueSizeInBits(); APInt DemandedBits = APInt::getAllOnes(BitWidth); return SimplifyDemandedBits(Op, DemandedBits); } bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) { TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); KnownBits Known; if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false)) return false; // Revisit the node. AddToWorklist(Op.getNode()); CommitTargetLoweringOpt(TLO); return true; } /// Check the specified vector node value to see if it can be simplified or /// if things it uses can be simplified as it only uses some of the /// elements. If so, return true. bool SimplifyDemandedVectorElts(SDValue Op) { // TODO: For now just pretend it cannot be simplified. if (Op.getValueType().isScalableVector()) return false; unsigned NumElts = Op.getValueType().getVectorNumElements(); APInt DemandedElts = APInt::getAllOnes(NumElts); return SimplifyDemandedVectorElts(Op, DemandedElts); } bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, bool AssumeSingleUse = false); bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts, bool AssumeSingleUse = false); bool CombineToPreIndexedLoadStore(SDNode *N); bool CombineToPostIndexedLoadStore(SDNode *N); SDValue SplitIndexingFromLoad(LoadSDNode *LD); bool SliceUpLoad(SDNode *N); // Scalars have size 0 to distinguish from singleton vectors. SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD); bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val); bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val); /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed /// load. /// /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced. /// \param InVecVT type of the input vector to EVE with bitcasts resolved. /// \param EltNo index of the vector element to load. /// \param OriginalLoad load that EVE came from to be replaced. /// \returns EVE on success SDValue() on failure. SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad); void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); SDValue SExtPromoteOperand(SDValue Op, EVT PVT); SDValue ZExtPromoteOperand(SDValue Op, EVT PVT); SDValue PromoteIntBinOp(SDValue Op); SDValue PromoteIntShiftOp(SDValue Op); SDValue PromoteExtend(SDValue Op); bool PromoteLoad(SDValue Op); /// Call the node-specific routine that knows how to fold each /// particular type of node. If that doesn't do anything, try the /// target-specific DAG combines. SDValue combine(SDNode *N); // Visitation implementation - Implement dag node combining for different // node types. The semantics are as follows: // Return Value: // SDValue.getNode() == 0 - No change was made // SDValue.getNode() == N - N was replaced, is dead and has been handled. // otherwise - N should be replaced by the returned Operand. // SDValue visitTokenFactor(SDNode *N); SDValue visitMERGE_VALUES(SDNode *N); SDValue visitADD(SDNode *N); SDValue visitADDLike(SDNode *N); SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference); SDValue visitSUB(SDNode *N); SDValue visitADDSAT(SDNode *N); SDValue visitSUBSAT(SDNode *N); SDValue visitADDC(SDNode *N); SDValue visitADDO(SDNode *N); SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N); SDValue visitSUBC(SDNode *N); SDValue visitSUBO(SDNode *N); SDValue visitADDE(SDNode *N); SDValue visitADDCARRY(SDNode *N); SDValue visitSADDO_CARRY(SDNode *N); SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N); SDValue visitSUBE(SDNode *N); SDValue visitSUBCARRY(SDNode *N); SDValue visitSSUBO_CARRY(SDNode *N); SDValue visitMUL(SDNode *N); SDValue visitMULFIX(SDNode *N); SDValue useDivRem(SDNode *N); SDValue visitSDIV(SDNode *N); SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N); SDValue visitUDIV(SDNode *N); SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N); SDValue visitREM(SDNode *N); SDValue visitMULHU(SDNode *N); SDValue visitMULHS(SDNode *N); SDValue visitAVG(SDNode *N); SDValue visitSMUL_LOHI(SDNode *N); SDValue visitUMUL_LOHI(SDNode *N); SDValue visitMULO(SDNode *N); SDValue visitIMINMAX(SDNode *N); SDValue visitAND(SDNode *N); SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N); SDValue visitOR(SDNode *N); SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N); SDValue visitXOR(SDNode *N); SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL); SDValue visitSHL(SDNode *N); SDValue visitSRA(SDNode *N); SDValue visitSRL(SDNode *N); SDValue visitFunnelShift(SDNode *N); SDValue visitSHLSAT(SDNode *N); SDValue visitRotate(SDNode *N); SDValue visitABS(SDNode *N); SDValue visitBSWAP(SDNode *N); SDValue visitBITREVERSE(SDNode *N); SDValue visitCTLZ(SDNode *N); SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); SDValue visitCTTZ(SDNode *N); SDValue visitCTTZ_ZERO_UNDEF(SDNode *N); SDValue visitCTPOP(SDNode *N); SDValue visitSELECT(SDNode *N); SDValue visitVSELECT(SDNode *N); SDValue visitSELECT_CC(SDNode *N); SDValue visitSETCC(SDNode *N); SDValue visitSETCCCARRY(SDNode *N); SDValue visitSIGN_EXTEND(SDNode *N); SDValue visitZERO_EXTEND(SDNode *N); SDValue visitANY_EXTEND(SDNode *N); SDValue visitAssertExt(SDNode *N); SDValue visitAssertAlign(SDNode *N); SDValue visitSIGN_EXTEND_INREG(SDNode *N); SDValue visitEXTEND_VECTOR_INREG(SDNode *N); SDValue visitTRUNCATE(SDNode *N); SDValue visitBITCAST(SDNode *N); SDValue visitFREEZE(SDNode *N); SDValue visitBUILD_PAIR(SDNode *N); SDValue visitFADD(SDNode *N); SDValue visitSTRICT_FADD(SDNode *N); SDValue visitFSUB(SDNode *N); SDValue visitFMUL(SDNode *N); SDValue visitFMA(SDNode *N); SDValue visitFDIV(SDNode *N); SDValue visitFREM(SDNode *N); SDValue visitFSQRT(SDNode *N); SDValue visitFCOPYSIGN(SDNode *N); SDValue visitFPOW(SDNode *N); SDValue visitSINT_TO_FP(SDNode *N); SDValue visitUINT_TO_FP(SDNode *N); SDValue visitFP_TO_SINT(SDNode *N); SDValue visitFP_TO_UINT(SDNode *N); SDValue visitFP_ROUND(SDNode *N); SDValue visitFP_EXTEND(SDNode *N); SDValue visitFNEG(SDNode *N); SDValue visitFABS(SDNode *N); SDValue visitFCEIL(SDNode *N); SDValue visitFTRUNC(SDNode *N); SDValue visitFFLOOR(SDNode *N); SDValue visitFMinMax(SDNode *N); SDValue visitBRCOND(SDNode *N); SDValue visitBR_CC(SDNode *N); SDValue visitLOAD(SDNode *N); SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain); SDValue replaceStoreOfFPConstant(StoreSDNode *ST); SDValue visitSTORE(SDNode *N); SDValue visitLIFETIME_END(SDNode *N); SDValue visitINSERT_VECTOR_ELT(SDNode *N); SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); SDValue visitBUILD_VECTOR(SDNode *N); SDValue visitCONCAT_VECTORS(SDNode *N); SDValue visitEXTRACT_SUBVECTOR(SDNode *N); SDValue visitVECTOR_SHUFFLE(SDNode *N); SDValue visitSCALAR_TO_VECTOR(SDNode *N); SDValue visitINSERT_SUBVECTOR(SDNode *N); SDValue visitMLOAD(SDNode *N); SDValue visitMSTORE(SDNode *N); SDValue visitMGATHER(SDNode *N); SDValue visitMSCATTER(SDNode *N); SDValue visitFP_TO_FP16(SDNode *N); SDValue visitFP16_TO_FP(SDNode *N); SDValue visitFP_TO_BF16(SDNode *N); SDValue visitVECREDUCE(SDNode *N); SDValue visitVPOp(SDNode *N); SDValue visitFADDForFMACombine(SDNode *N); SDValue visitFSUBForFMACombine(SDNode *N); SDValue visitFMULForFMADistributiveCombine(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); bool reassociationCanBreakAddressingModePattern(unsigned Opc, const SDLoc &DL, SDNode *N, SDValue N0, SDValue N1); SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0, SDValue N1); SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, SDValue N1, SDNodeFlags Flags); SDValue visitShiftByConstant(SDNode *N); SDValue foldSelectOfConstants(SDNode *N); SDValue foldVSelectOfConstants(SDNode *N); SDValue foldBinOpIntoSelect(SDNode *BO); bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N); SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2); SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, bool NotExtCompare = false); SDValue convertSelectOfFPConstantsToLoadOffset( const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC); SDValue foldSignChangeInBitcast(SDNode *N); SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC); SDValue foldSelectOfBinops(SDNode *N); SDValue foldSextSetcc(SDNode *N); SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, const SDLoc &DL); SDValue foldSubToUSubSat(EVT DstVT, SDNode *N); SDValue unfoldMaskedMerge(SDNode *N); SDValue unfoldExtremeBitClearingToShifts(SDNode *N); SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &DL, bool foldBooleans); SDValue rebuildSetCC(SDValue N); bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, SDValue &CC, bool MatchStrict = false) const; bool isOneUseSetCC(SDValue N) const; SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp); SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); SDValue CombineExtLoad(SDNode *N); SDValue CombineZExtLogicopShiftLoad(SDNode *N); SDValue combineRepeatedFPDivisors(SDNode *N); SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex); SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); SDValue BuildSDIV(SDNode *N); SDValue BuildSDIVPow2(SDNode *N); SDValue BuildUDIV(SDNode *N); SDValue BuildSREMPow2(SDNode *N); SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N); SDValue BuildLogBase2(SDValue V, const SDLoc &DL); SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags); SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags); SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags); SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip); SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations, SDNodeFlags Flags, bool Reciprocal); SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations, SDNodeFlags Flags, bool Reciprocal); SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, SDValue InnerPos, SDValue InnerNeg, bool HasPos, unsigned PosOpcode, unsigned NegOpcode, const SDLoc &DL); SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg, SDValue InnerPos, SDValue InnerNeg, bool HasPos, unsigned PosOpcode, unsigned NegOpcode, const SDLoc &DL); SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL); SDValue MatchLoadCombine(SDNode *N); SDValue mergeTruncStores(StoreSDNode *N); SDValue reduceLoadWidth(SDNode *N); SDValue ReduceLoadOpStoreWidth(SDNode *N); SDValue splitMergedValStore(StoreSDNode *ST); SDValue TransformFPLoadStorePair(SDNode *N); SDValue convertBuildVecZextToZext(SDNode *N); SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); SDValue reduceBuildVecTruncToBitCast(SDNode *N); SDValue reduceBuildVecToShuffle(SDNode *N); SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N, ArrayRef VectorMask, SDValue VecIn1, SDValue VecIn2, unsigned LeftIdx, bool DidSplitVec); SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast); /// Walk up chain skipping non-aliasing memory nodes, /// looking for aliasing nodes and adding them to the Aliases vector. void GatherAllAliases(SDNode *N, SDValue OriginalChain, SmallVectorImpl &Aliases); /// Return true if there is any possibility that the two addresses overlap. bool mayAlias(SDNode *Op0, SDNode *Op1) const; /// Walk up chain skipping non-aliasing memory nodes, looking for a better /// chain (aliasing node.) SDValue FindBetterChain(SDNode *N, SDValue Chain); /// Try to replace a store and any possibly adjacent stores on /// consecutive chains with better chains. Return true only if St is /// replaced. /// /// Notice that other chains may still be replaced even if the function /// returns false. bool findBetterNeighborChains(StoreSDNode *St); // Helper for findBetterNeighborChains. Walk up store chain add additional // chained stores that do not overlap and can be parallelized. bool parallelizeChainedStores(StoreSDNode *St); /// Holds a pointer to an LSBaseSDNode as well as information on where it /// is located in a sequence of memory operations connected by a chain. struct MemOpLink { // Ptr to the mem node. LSBaseSDNode *MemNode; // Offset from the base ptr. int64_t OffsetFromBase; MemOpLink(LSBaseSDNode *N, int64_t Offset) : MemNode(N), OffsetFromBase(Offset) {} }; // Classify the origin of a stored value. enum class StoreSource { Unknown, Constant, Extract, Load }; StoreSource getStoreSource(SDValue StoreVal) { switch (StoreVal.getOpcode()) { case ISD::Constant: case ISD::ConstantFP: return StoreSource::Constant; case ISD::EXTRACT_VECTOR_ELT: case ISD::EXTRACT_SUBVECTOR: return StoreSource::Extract; case ISD::LOAD: return StoreSource::Load; default: return StoreSource::Unknown; } } /// This is a helper function for visitMUL to check the profitability /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). /// MulNode is the original multiply, AddNode is (add x, c1), /// and ConstNode is c2. bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode, SDValue ConstNode); /// This is a helper function for visitAND and visitZERO_EXTEND. Returns /// true if the (and (load x) c) pattern matches an extload. ExtVT returns /// the type of the loaded value to be extended. bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, EVT LoadResultTy, EVT &ExtVT); /// Helper function to calculate whether the given Load/Store can have its /// width reduced to ExtVT. bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType, EVT &MemVT, unsigned ShAmt = 0); /// Used by BackwardsPropagateMask to find suitable loads. bool SearchForAndLoads(SDNode *N, SmallVectorImpl &Loads, SmallPtrSetImpl &NodesWithConsts, ConstantSDNode *Mask, SDNode *&NodeToMask); /// Attempt to propagate a given AND node back to load leaves so that they /// can be combined into narrow loads. bool BackwardsPropagateMask(SDNode *N); /// Helper function for mergeConsecutiveStores which merges the component /// store chains. SDValue getMergeStoreChains(SmallVectorImpl &StoreNodes, unsigned NumStores); /// This is a helper function for mergeConsecutiveStores. When the source /// elements of the consecutive stores are all constants or all extracted /// vector elements, try to merge them into one larger store introducing /// bitcasts if necessary. \return True if a merged store was created. bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl &StoreNodes, EVT MemVT, unsigned NumStores, bool IsConstantSrc, bool UseVector, bool UseTrunc); /// This is a helper function for mergeConsecutiveStores. Stores that /// potentially may be merged with St are placed in StoreNodes. RootNode is /// a chain predecessor to all store candidates. void getStoreMergeCandidates(StoreSDNode *St, SmallVectorImpl &StoreNodes, SDNode *&Root); /// Helper function for mergeConsecutiveStores. Checks if candidate stores /// have indirect dependency through their operands. RootNode is the /// predecessor to all stores calculated by getStoreMergeCandidates and is /// used to prune the dependency check. \return True if safe to merge. bool checkMergeStoreCandidatesForDependencies( SmallVectorImpl &StoreNodes, unsigned NumStores, SDNode *RootNode); /// This is a helper function for mergeConsecutiveStores. Given a list of /// store candidates, find the first N that are consecutive in memory. /// Returns 0 if there are not at least 2 consecutive stores to try merging. unsigned getConsecutiveStores(SmallVectorImpl &StoreNodes, int64_t ElementSizeBytes) const; /// This is a helper function for mergeConsecutiveStores. It is used for /// store chains that are composed entirely of constant values. bool tryStoreMergeOfConstants(SmallVectorImpl &StoreNodes, unsigned NumConsecutiveStores, EVT MemVT, SDNode *Root, bool AllowVectors); /// This is a helper function for mergeConsecutiveStores. It is used for /// store chains that are composed entirely of extracted vector elements. /// When extracting multiple vector elements, try to store them in one /// vector store rather than a sequence of scalar stores. bool tryStoreMergeOfExtracts(SmallVectorImpl &StoreNodes, unsigned NumConsecutiveStores, EVT MemVT, SDNode *Root); /// This is a helper function for mergeConsecutiveStores. It is used for /// store chains that are composed entirely of loaded values. bool tryStoreMergeOfLoads(SmallVectorImpl &StoreNodes, unsigned NumConsecutiveStores, EVT MemVT, SDNode *Root, bool AllowVectors, bool IsNonTemporalStore, bool IsNonTemporalLoad); /// Merge consecutive store operations into a wide store. /// This optimization uses wide integers or vectors when possible. /// \return true if stores were merged. bool mergeConsecutiveStores(StoreSDNode *St); /// Try to transform a truncation where C is a constant: /// (trunc (and X, C)) -> (and (trunc X), (trunc C)) /// /// \p N needs to be a truncation and its first operand an AND. Other /// requirements are checked by the function (e.g. that trunc is /// single-use) and if missed an empty SDValue is returned. SDValue distributeTruncateThroughAnd(SDNode *N); /// Helper function to determine whether the target supports operation /// given by \p Opcode for type \p VT, that is, whether the operation /// is legal or custom before legalizing operations, and whether is /// legal (but not custom) after legalization. bool hasOperation(unsigned Opcode, EVT VT) { return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations); } public: /// Runs the dag combiner on all nodes in the work list void Run(CombineLevel AtLevel); SelectionDAG &getDAG() const { return DAG; } /// Returns a type large enough to hold any valid shift amount - before type /// legalization these can be huge. EVT getShiftAmountTy(EVT LHSTy) { assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes); } /// This method returns true if we are running before type legalization or /// if the specified VT is legal. bool isTypeLegal(const EVT &VT) { if (!LegalTypes) return true; return TLI.isTypeLegal(VT); } /// Convenience wrapper around TargetLowering::getSetCCResultType EVT getSetCCResultType(EVT VT) const { return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); } void ExtendSetCCUses(const SmallVectorImpl &SetCCs, SDValue OrigLoad, SDValue ExtLoad, ISD::NodeType ExtType); }; /// This class is a DAGUpdateListener that removes any deleted /// nodes from the worklist. class WorklistRemover : public SelectionDAG::DAGUpdateListener { DAGCombiner &DC; public: explicit WorklistRemover(DAGCombiner &dc) : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {} void NodeDeleted(SDNode *N, SDNode *E) override { DC.removeFromWorklist(N); } }; class WorklistInserter : public SelectionDAG::DAGUpdateListener { DAGCombiner &DC; public: explicit WorklistInserter(DAGCombiner &dc) : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {} // FIXME: Ideally we could add N to the worklist, but this causes exponential // compile time costs in large DAGs, e.g. Halide. void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); } }; } // end anonymous namespace //===----------------------------------------------------------------------===// // TargetLowering::DAGCombinerInfo implementation //===----------------------------------------------------------------------===// void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { ((DAGCombiner*)DC)->AddToWorklist(N); } SDValue TargetLowering::DAGCombinerInfo:: CombineTo(SDNode *N, ArrayRef To, bool AddTo) { return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); } SDValue TargetLowering::DAGCombinerInfo:: CombineTo(SDNode *N, SDValue Res, bool AddTo) { return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo); } SDValue TargetLowering::DAGCombinerInfo:: CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo); } bool TargetLowering::DAGCombinerInfo:: recursivelyDeleteUnusedNodes(SDNode *N) { return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N); } void TargetLowering::DAGCombinerInfo:: CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO); } //===----------------------------------------------------------------------===// // Helper Functions //===----------------------------------------------------------------------===// void DAGCombiner::deleteAndRecombine(SDNode *N) { removeFromWorklist(N); // If the operands of this node are only used by the node, they will now be // dead. Make sure to re-visit them and recursively delete dead nodes. for (const SDValue &Op : N->ops()) // For an operand generating multiple values, one of the values may // become dead allowing further simplification (e.g. split index // arithmetic from an indexed load). if (Op->hasOneUse() || Op->getNumValues() > 1) AddToWorklist(Op.getNode()); DAG.DeleteNode(N); } // APInts must be the same size for most operations, this helper // function zero extends the shorter of the pair so that they match. // We provide an Offset so that we can create bitwidths that won't overflow. static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) { unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth()); LHS = LHS.zext(Bits); RHS = RHS.zext(Bits); } // Return true if this node is a setcc, or is a select_cc // that selects between the target values used for true and false, making it // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to // the appropriate nodes based on the type of node we are checking. This // simplifies life a bit for the callers. bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, SDValue &CC, bool MatchStrict) const { if (N.getOpcode() == ISD::SETCC) { LHS = N.getOperand(0); RHS = N.getOperand(1); CC = N.getOperand(2); return true; } if (MatchStrict && (N.getOpcode() == ISD::STRICT_FSETCC || N.getOpcode() == ISD::STRICT_FSETCCS)) { LHS = N.getOperand(1); RHS = N.getOperand(2); CC = N.getOperand(3); return true; } if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) || !TLI.isConstFalseVal(N.getOperand(3))) return false; if (TLI.getBooleanContents(N.getValueType()) == TargetLowering::UndefinedBooleanContent) return false; LHS = N.getOperand(0); RHS = N.getOperand(1); CC = N.getOperand(4); return true; } /// Return true if this is a SetCC-equivalent operation with only one use. /// If this is true, it allows the users to invert the operation for free when /// it is profitable to do so. bool DAGCombiner::isOneUseSetCC(SDValue N) const { SDValue N0, N1, N2; if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse()) return true; return false; } static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) { if (!ScalarTy.isSimple()) return false; uint64_t MaskForTy = 0ULL; switch (ScalarTy.getSimpleVT().SimpleTy) { case MVT::i8: MaskForTy = 0xFFULL; break; case MVT::i16: MaskForTy = 0xFFFFULL; break; case MVT::i32: MaskForTy = 0xFFFFFFFFULL; break; default: return false; break; } APInt Val; if (ISD::isConstantSplatVector(N, Val)) return Val.getLimitedValue() == MaskForTy; return false; } // Determines if it is a constant integer or a splat/build vector of constant // integers (and undefs). // Do not permit build vector implicit truncation. static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) { if (ConstantSDNode *Const = dyn_cast(N)) return !(Const->isOpaque() && NoOpaques); if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR) return false; unsigned BitWidth = N.getScalarValueSizeInBits(); for (const SDValue &Op : N->op_values()) { if (Op.isUndef()) continue; ConstantSDNode *Const = dyn_cast(Op); if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth || (Const->isOpaque() && NoOpaques)) return false; } return true; } // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with // undef's. static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) { if (V.getOpcode() != ISD::BUILD_VECTOR) return false; return isConstantOrConstantVector(V, NoOpaques) || ISD::isBuildVectorOfConstantFPSDNodes(V.getNode()); } // Determine if this an indexed load with an opaque target constant index. static bool canSplitIdx(LoadSDNode *LD) { return MaySplitLoadIndex && (LD->getOperand(2).getOpcode() != ISD::TargetConstant || !cast(LD->getOperand(2))->isOpaque()); } bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc, const SDLoc &DL, SDNode *N, SDValue N0, SDValue N1) { // Currently this only tries to ensure we don't undo the GEP splits done by // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this, // we check if the following transformation would be problematic: // (load/store (add, (add, x, offset1), offset2)) -> // (load/store (add, x, offset1+offset2)). // (load/store (add, (add, x, y), offset2)) -> // (load/store (add, (add, x, offset2), y)). if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD) return false; auto *C2 = dyn_cast(N1); if (!C2) return false; const APInt &C2APIntVal = C2->getAPIntValue(); if (C2APIntVal.getSignificantBits() > 64) return false; if (auto *C1 = dyn_cast(N0.getOperand(1))) { if (N0.hasOneUse()) return false; const APInt &C1APIntVal = C1->getAPIntValue(); const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal; if (CombinedValueIntVal.getSignificantBits() > 64) return false; const int64_t CombinedValue = CombinedValueIntVal.getSExtValue(); for (SDNode *Node : N->uses()) { if (auto *LoadStore = dyn_cast(Node)) { // Is x[offset2] already not a legal addressing mode? If so then // reassociating the constants breaks nothing (we test offset2 because // that's the one we hope to fold into the load or store). TargetLoweringBase::AddrMode AM; AM.HasBaseReg = true; AM.BaseOffs = C2APIntVal.getSExtValue(); EVT VT = LoadStore->getMemoryVT(); unsigned AS = LoadStore->getAddressSpace(); Type *AccessTy = VT.getTypeForEVT(*DAG.getContext()); if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS)) continue; // Would x[offset1+offset2] still be a legal addressing mode? AM.BaseOffs = CombinedValue; if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS)) return true; } } } else { if (auto *GA = dyn_cast(N0.getOperand(1))) if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA)) return false; for (SDNode *Node : N->uses()) { auto *LoadStore = dyn_cast(Node); if (!LoadStore) return false; // Is x[offset2] a legal addressing mode? If so then // reassociating the constants breaks address pattern TargetLoweringBase::AddrMode AM; AM.HasBaseReg = true; AM.BaseOffs = C2APIntVal.getSExtValue(); EVT VT = LoadStore->getMemoryVT(); unsigned AS = LoadStore->getAddressSpace(); Type *AccessTy = VT.getTypeForEVT(*DAG.getContext()); if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS)) return false; } return true; } return false; } // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc. SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0, SDValue N1) { EVT VT = N0.getValueType(); if (N0.getOpcode() != Opc) return SDValue(); SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N01))) { if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1))) { // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2)) if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1})) return DAG.getNode(Opc, DL, VT, N00, OpNode); return SDValue(); } if (TLI.isReassocProfitable(DAG, N0, N1)) { // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1) // iff (op x, c1) has one use SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1); return DAG.getNode(Opc, DL, VT, OpNode, N01); } } // Check for repeated operand logic simplifications. if (Opc == ISD::AND || Opc == ISD::OR) { // (N00 & N01) & N00 --> N00 & N01 // (N00 & N01) & N01 --> N00 & N01 // (N00 | N01) | N00 --> N00 | N01 // (N00 | N01) | N01 --> N00 | N01 if (N1 == N00 || N1 == N01) return N0; } if (Opc == ISD::XOR) { // (N00 ^ N01) ^ N00 --> N01 if (N1 == N00) return N01; // (N00 ^ N01) ^ N01 --> N00 if (N1 == N01) return N00; } if (TLI.isReassocProfitable(DAG, N0, N1)) { if (N1 != N01) { // Reassociate if (op N00, N1) already exist if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) { // if Op (Op N00, N1), N01 already exist // we need to stop reassciate to avoid dead loop if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01})) return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01); } } if (N1 != N00) { // Reassociate if (op N01, N1) already exist if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) { // if Op (Op N01, N1), N00 already exist // we need to stop reassciate to avoid dead loop if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00})) return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00); } } } return SDValue(); } // Try to reassociate commutative binops. SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, SDValue N1, SDNodeFlags Flags) { assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative."); // Floating-point reassociation is not allowed without loose FP math. if (N0.getValueType().isFloatingPoint() || N1.getValueType().isFloatingPoint()) if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros()) return SDValue(); if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1)) return Combined; if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0)) return Combined; return SDValue(); } SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, bool AddTo) { assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); ++NodesCombined; LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: "; To[0].dump(&DAG); dbgs() << " and " << NumTo - 1 << " other values\n"); for (unsigned i = 0, e = NumTo; i != e; ++i) assert((!To[i].getNode() || N->getValueType(i) == To[i].getValueType()) && "Cannot combine value to value of different type!"); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesWith(N, To); if (AddTo) { // Push the new nodes and any users onto the worklist for (unsigned i = 0, e = NumTo; i != e; ++i) { if (To[i].getNode()) AddToWorklistWithUsers(To[i].getNode()); } } // Finally, if the node is now dead, remove it from the graph. The node // may not be dead if the replacement process recursively simplified to // something else needing this node. if (N->use_empty()) deleteAndRecombine(N); return SDValue(N, 0); } void DAGCombiner:: CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { // Replace the old value with the new one. ++NodesCombined; LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG); dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n'); // Replace all uses. If any nodes become isomorphic to other nodes and // are deleted, make sure to remove them from our worklist. WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New); // Push the new node and any (possibly new) users onto the worklist. AddToWorklistWithUsers(TLO.New.getNode()); // Finally, if the node is now dead, remove it from the graph. The node // may not be dead if the replacement process recursively simplified to // something else needing this node. if (TLO.Old->use_empty()) deleteAndRecombine(TLO.Old.getNode()); } /// Check the specified integer node value to see if it can be simplified or if /// things it uses can be simplified by bit propagation. If so, return true. bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, bool AssumeSingleUse) { TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); KnownBits Known; if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0, AssumeSingleUse)) return false; // Revisit the node. AddToWorklist(Op.getNode()); CommitTargetLoweringOpt(TLO); return true; } /// Check the specified vector node value to see if it can be simplified or /// if things it uses can be simplified as it only uses some of the elements. /// If so, return true. bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts, bool AssumeSingleUse) { TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); APInt KnownUndef, KnownZero; if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO, 0, AssumeSingleUse)) return false; // Revisit the node. AddToWorklist(Op.getNode()); CommitTargetLoweringOpt(TLO); return true; } void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { SDLoc DL(Load); EVT VT = Load->getValueType(0); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0)); LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: "; Trunc.dump(&DAG); dbgs() << '\n'); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc); DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); deleteAndRecombine(Load); AddToWorklist(Trunc.getNode()); } SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { Replace = false; SDLoc DL(Op); if (ISD::isUNINDEXEDLoad(Op.getNode())) { LoadSDNode *LD = cast(Op); EVT MemVT = LD->getMemoryVT(); ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD : LD->getExtensionType(); Replace = true; return DAG.getExtLoad(ExtType, DL, PVT, LD->getChain(), LD->getBasePtr(), MemVT, LD->getMemOperand()); } unsigned Opc = Op.getOpcode(); switch (Opc) { default: break; case ISD::AssertSext: if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT)) return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1)); break; case ISD::AssertZext: if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT)) return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1)); break; case ISD::Constant: { unsigned ExtOpc = Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; return DAG.getNode(ExtOpc, DL, PVT, Op); } } if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT)) return SDValue(); return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op); } SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT)) return SDValue(); EVT OldVT = Op.getValueType(); SDLoc DL(Op); bool Replace = false; SDValue NewOp = PromoteOperand(Op, PVT, Replace); if (!NewOp.getNode()) return SDValue(); AddToWorklist(NewOp.getNode()); if (Replace) ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp, DAG.getValueType(OldVT)); } SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { EVT OldVT = Op.getValueType(); SDLoc DL(Op); bool Replace = false; SDValue NewOp = PromoteOperand(Op, PVT, Replace); if (!NewOp.getNode()) return SDValue(); AddToWorklist(NewOp.getNode()); if (Replace) ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); return DAG.getZeroExtendInReg(NewOp, DL, OldVT); } /// Promote the specified integer binary operation if the target indicates it is /// beneficial. e.g. On x86, it's usually better to promote i16 operations to /// i32 since i16 instructions are longer. SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { if (!LegalOperations) return SDValue(); EVT VT = Op.getValueType(); if (VT.isVector() || !VT.isInteger()) return SDValue(); // If operation type is 'undesirable', e.g. i16 on x86, consider // promoting it. unsigned Opc = Op.getOpcode(); if (TLI.isTypeDesirableForOp(Opc, VT)) return SDValue(); EVT PVT = VT; // Consult target whether it is a good idea to promote this operation and // what's the right type to promote it to. if (TLI.IsDesirableToPromoteOp(Op, PVT)) { assert(PVT != VT && "Don't know what type to promote to!"); LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG)); bool Replace0 = false; SDValue N0 = Op.getOperand(0); SDValue NN0 = PromoteOperand(N0, PVT, Replace0); bool Replace1 = false; SDValue N1 = Op.getOperand(1); SDValue NN1 = PromoteOperand(N1, PVT, Replace1); SDLoc DL(Op); SDValue RV = DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1)); // We are always replacing N0/N1's use in N and only need additional // replacements if there are additional uses. // Note: We are checking uses of the *nodes* (SDNode) rather than values // (SDValue) here because the node may reference multiple values // (for example, the chain value of a load node). Replace0 &= !N0->hasOneUse(); Replace1 &= (N0 != N1) && !N1->hasOneUse(); // Combine Op here so it is preserved past replacements. CombineTo(Op.getNode(), RV); // If operands have a use ordering, make sure we deal with // predecessor first. if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) { std::swap(N0, N1); std::swap(NN0, NN1); } if (Replace0) { AddToWorklist(NN0.getNode()); ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode()); } if (Replace1) { AddToWorklist(NN1.getNode()); ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode()); } return Op; } return SDValue(); } /// Promote the specified integer shift operation if the target indicates it is /// beneficial. e.g. On x86, it's usually better to promote i16 operations to /// i32 since i16 instructions are longer. SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { if (!LegalOperations) return SDValue(); EVT VT = Op.getValueType(); if (VT.isVector() || !VT.isInteger()) return SDValue(); // If operation type is 'undesirable', e.g. i16 on x86, consider // promoting it. unsigned Opc = Op.getOpcode(); if (TLI.isTypeDesirableForOp(Opc, VT)) return SDValue(); EVT PVT = VT; // Consult target whether it is a good idea to promote this operation and // what's the right type to promote it to. if (TLI.IsDesirableToPromoteOp(Op, PVT)) { assert(PVT != VT && "Don't know what type to promote to!"); LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG)); bool Replace = false; SDValue N0 = Op.getOperand(0); if (Opc == ISD::SRA) N0 = SExtPromoteOperand(N0, PVT); else if (Opc == ISD::SRL) N0 = ZExtPromoteOperand(N0, PVT); else N0 = PromoteOperand(N0, PVT, Replace); if (!N0.getNode()) return SDValue(); SDLoc DL(Op); SDValue N1 = Op.getOperand(1); SDValue RV = DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1)); if (Replace) ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); // Deal with Op being deleted. if (Op && Op.getOpcode() != ISD::DELETED_NODE) return RV; } return SDValue(); } SDValue DAGCombiner::PromoteExtend(SDValue Op) { if (!LegalOperations) return SDValue(); EVT VT = Op.getValueType(); if (VT.isVector() || !VT.isInteger()) return SDValue(); // If operation type is 'undesirable', e.g. i16 on x86, consider // promoting it. unsigned Opc = Op.getOpcode(); if (TLI.isTypeDesirableForOp(Opc, VT)) return SDValue(); EVT PVT = VT; // Consult target whether it is a good idea to promote this operation and // what's the right type to promote it to. if (TLI.IsDesirableToPromoteOp(Op, PVT)) { assert(PVT != VT && "Don't know what type to promote to!"); // fold (aext (aext x)) -> (aext x) // fold (aext (zext x)) -> (zext x) // fold (aext (sext x)) -> (sext x) LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG)); return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0)); } return SDValue(); } bool DAGCombiner::PromoteLoad(SDValue Op) { if (!LegalOperations) return false; if (!ISD::isUNINDEXEDLoad(Op.getNode())) return false; EVT VT = Op.getValueType(); if (VT.isVector() || !VT.isInteger()) return false; // If operation type is 'undesirable', e.g. i16 on x86, consider // promoting it. unsigned Opc = Op.getOpcode(); if (TLI.isTypeDesirableForOp(Opc, VT)) return false; EVT PVT = VT; // Consult target whether it is a good idea to promote this operation and // what's the right type to promote it to. if (TLI.IsDesirableToPromoteOp(Op, PVT)) { assert(PVT != VT && "Don't know what type to promote to!"); SDLoc DL(Op); SDNode *N = Op.getNode(); LoadSDNode *LD = cast(N); EVT MemVT = LD->getMemoryVT(); ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD : LD->getExtensionType(); SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT, LD->getChain(), LD->getBasePtr(), MemVT, LD->getMemOperand()); SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD); LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: "; Result.dump(&DAG); dbgs() << '\n'); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); deleteAndRecombine(N); AddToWorklist(Result.getNode()); return true; } return false; } /// Recursively delete a node which has no uses and any operands for /// which it is the only use. /// /// Note that this both deletes the nodes and removes them from the worklist. /// It also adds any nodes who have had a user deleted to the worklist as they /// may now have only one use and subject to other combines. bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) { if (!N->use_empty()) return false; SmallSetVector Nodes; Nodes.insert(N); do { N = Nodes.pop_back_val(); if (!N) continue; if (N->use_empty()) { for (const SDValue &ChildN : N->op_values()) Nodes.insert(ChildN.getNode()); removeFromWorklist(N); DAG.DeleteNode(N); } else { AddToWorklist(N); } } while (!Nodes.empty()); return true; } //===----------------------------------------------------------------------===// // Main DAG Combiner implementation //===----------------------------------------------------------------------===// void DAGCombiner::Run(CombineLevel AtLevel) { // set the instance variables, so that the various visit routines may use it. Level = AtLevel; LegalDAG = Level >= AfterLegalizeDAG; LegalOperations = Level >= AfterLegalizeVectorOps; LegalTypes = Level >= AfterLegalizeTypes; WorklistInserter AddNodes(*this); // Add all the dag nodes to the worklist. for (SDNode &Node : DAG.allnodes()) AddToWorklist(&Node); // Create a dummy node (which is not added to allnodes), that adds a reference // to the root node, preventing it from being deleted, and tracking any // changes of the root. HandleSDNode Dummy(DAG.getRoot()); // While we have a valid worklist entry node, try to combine it. while (SDNode *N = getNextWorklistEntry()) { // If N has no uses, it is dead. Make sure to revisit all N's operands once // N is deleted from the DAG, since they too may now be dead or may have a // reduced number of uses, allowing other xforms. if (recursivelyDeleteUnusedNodes(N)) continue; WorklistRemover DeadNodes(*this); // If this combine is running after legalizing the DAG, re-legalize any // nodes pulled off the worklist. if (LegalDAG) { SmallSetVector UpdatedNodes; bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes); for (SDNode *LN : UpdatedNodes) AddToWorklistWithUsers(LN); if (!NIsValid) continue; } LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG)); // Add any operands of the new node which have not yet been combined to the // worklist as well. Because the worklist uniques things already, this // won't repeatedly process the same operand. CombinedNodes.insert(N); for (const SDValue &ChildN : N->op_values()) if (!CombinedNodes.count(ChildN.getNode())) AddToWorklist(ChildN.getNode()); SDValue RV = combine(N); if (!RV.getNode()) continue; ++NodesCombined; // If we get back the same node we passed in, rather than a new node or // zero, we know that the node must have defined multiple values and // CombineTo was used. Since CombineTo takes care of the worklist // mechanics for us, we have no work to do in this case. if (RV.getNode() == N) continue; assert(N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"); LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG)); if (N->getNumValues() == RV->getNumValues()) DAG.ReplaceAllUsesWith(N, RV.getNode()); else { assert(N->getValueType(0) == RV.getValueType() && N->getNumValues() == 1 && "Type mismatch"); DAG.ReplaceAllUsesWith(N, &RV); } // Push the new node and any users onto the worklist. Omit this if the // new node is the EntryToken (e.g. if a store managed to get optimized // out), because re-visiting the EntryToken and its users will not uncover // any additional opportunities, but there may be a large number of such // users, potentially causing compile time explosion. if (RV.getOpcode() != ISD::EntryToken) { AddToWorklist(RV.getNode()); AddUsersToWorklist(RV.getNode()); } // Finally, if the node is now dead, remove it from the graph. The node // may not be dead if the replacement process recursively simplified to // something else needing this node. This will also take care of adding any // operands which have lost a user to the worklist. recursivelyDeleteUnusedNodes(N); } // If the root changed (e.g. it was a dead load, update the root). DAG.setRoot(Dummy.getValue()); DAG.RemoveDeadNodes(); } SDValue DAGCombiner::visit(SDNode *N) { switch (N->getOpcode()) { default: break; case ISD::TokenFactor: return visitTokenFactor(N); case ISD::MERGE_VALUES: return visitMERGE_VALUES(N); case ISD::ADD: return visitADD(N); case ISD::SUB: return visitSUB(N); case ISD::SADDSAT: case ISD::UADDSAT: return visitADDSAT(N); case ISD::SSUBSAT: case ISD::USUBSAT: return visitSUBSAT(N); case ISD::ADDC: return visitADDC(N); case ISD::SADDO: case ISD::UADDO: return visitADDO(N); case ISD::SUBC: return visitSUBC(N); case ISD::SSUBO: case ISD::USUBO: return visitSUBO(N); case ISD::ADDE: return visitADDE(N); case ISD::ADDCARRY: return visitADDCARRY(N); case ISD::SADDO_CARRY: return visitSADDO_CARRY(N); case ISD::SUBE: return visitSUBE(N); case ISD::SUBCARRY: return visitSUBCARRY(N); case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N); case ISD::SMULFIX: case ISD::SMULFIXSAT: case ISD::UMULFIX: case ISD::UMULFIXSAT: return visitMULFIX(N); case ISD::MUL: return visitMUL(N); case ISD::SDIV: return visitSDIV(N); case ISD::UDIV: return visitUDIV(N); case ISD::SREM: case ISD::UREM: return visitREM(N); case ISD::MULHU: return visitMULHU(N); case ISD::MULHS: return visitMULHS(N); case ISD::AVGFLOORS: case ISD::AVGFLOORU: case ISD::AVGCEILS: case ISD::AVGCEILU: return visitAVG(N); case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); case ISD::SMULO: case ISD::UMULO: return visitMULO(N); case ISD::SMIN: case ISD::SMAX: case ISD::UMIN: case ISD::UMAX: return visitIMINMAX(N); case ISD::AND: return visitAND(N); case ISD::OR: return visitOR(N); case ISD::XOR: return visitXOR(N); case ISD::SHL: return visitSHL(N); case ISD::SRA: return visitSRA(N); case ISD::SRL: return visitSRL(N); case ISD::ROTR: case ISD::ROTL: return visitRotate(N); case ISD::FSHL: case ISD::FSHR: return visitFunnelShift(N); case ISD::SSHLSAT: case ISD::USHLSAT: return visitSHLSAT(N); case ISD::ABS: return visitABS(N); case ISD::BSWAP: return visitBSWAP(N); case ISD::BITREVERSE: return visitBITREVERSE(N); case ISD::CTLZ: return visitCTLZ(N); case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); case ISD::CTTZ: return visitCTTZ(N); case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); case ISD::CTPOP: return visitCTPOP(N); case ISD::SELECT: return visitSELECT(N); case ISD::VSELECT: return visitVSELECT(N); case ISD::SELECT_CC: return visitSELECT_CC(N); case ISD::SETCC: return visitSETCC(N); case ISD::SETCCCARRY: return visitSETCCCARRY(N); case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); case ISD::ANY_EXTEND: return visitANY_EXTEND(N); case ISD::AssertSext: case ISD::AssertZext: return visitAssertExt(N); case ISD::AssertAlign: return visitAssertAlign(N); case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); case ISD::SIGN_EXTEND_VECTOR_INREG: case ISD::ZERO_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N); case ISD::TRUNCATE: return visitTRUNCATE(N); case ISD::BITCAST: return visitBITCAST(N); case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); case ISD::FADD: return visitFADD(N); case ISD::STRICT_FADD: return visitSTRICT_FADD(N); case ISD::FSUB: return visitFSUB(N); case ISD::FMUL: return visitFMUL(N); case ISD::FMA: return visitFMA(N); case ISD::FDIV: return visitFDIV(N); case ISD::FREM: return visitFREM(N); case ISD::FSQRT: return visitFSQRT(N); case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); case ISD::FPOW: return visitFPOW(N); case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); case ISD::FP_ROUND: return visitFP_ROUND(N); case ISD::FP_EXTEND: return visitFP_EXTEND(N); case ISD::FNEG: return visitFNEG(N); case ISD::FABS: return visitFABS(N); case ISD::FFLOOR: return visitFFLOOR(N); case ISD::FMINNUM: case ISD::FMAXNUM: case ISD::FMINIMUM: case ISD::FMAXIMUM: return visitFMinMax(N); case ISD::FCEIL: return visitFCEIL(N); case ISD::FTRUNC: return visitFTRUNC(N); case ISD::BRCOND: return visitBRCOND(N); case ISD::BR_CC: return visitBR_CC(N); case ISD::LOAD: return visitLOAD(N); case ISD::STORE: return visitSTORE(N); case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N); case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N); case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); case ISD::MGATHER: return visitMGATHER(N); case ISD::MLOAD: return visitMLOAD(N); case ISD::MSCATTER: return visitMSCATTER(N); case ISD::MSTORE: return visitMSTORE(N); case ISD::LIFETIME_END: return visitLIFETIME_END(N); case ISD::FP_TO_FP16: return visitFP_TO_FP16(N); case ISD::FP16_TO_FP: return visitFP16_TO_FP(N); case ISD::FP_TO_BF16: return visitFP_TO_BF16(N); case ISD::FREEZE: return visitFREEZE(N); case ISD::VECREDUCE_FADD: case ISD::VECREDUCE_FMUL: case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_MUL: case ISD::VECREDUCE_AND: case ISD::VECREDUCE_OR: case ISD::VECREDUCE_XOR: case ISD::VECREDUCE_SMAX: case ISD::VECREDUCE_SMIN: case ISD::VECREDUCE_UMAX: case ISD::VECREDUCE_UMIN: case ISD::VECREDUCE_FMAX: case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N); #define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC: #include "llvm/IR/VPIntrinsics.def" return visitVPOp(N); } return SDValue(); } SDValue DAGCombiner::combine(SDNode *N) { SDValue RV; if (!DisableGenericCombines) RV = visit(N); // If nothing happened, try a target-specific DAG combine. if (!RV.getNode()) { assert(N->getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned NULL!"); if (N->getOpcode() >= ISD::BUILTIN_OP_END || TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) { // Expose the DAG combiner to the target combiner impls. TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, Level, false, this); RV = TLI.PerformDAGCombine(N, DagCombineInfo); } } // If nothing happened still, try promoting the operation. if (!RV.getNode()) { switch (N->getOpcode()) { default: break; case ISD::ADD: case ISD::SUB: case ISD::MUL: case ISD::AND: case ISD::OR: case ISD::XOR: RV = PromoteIntBinOp(SDValue(N, 0)); break; case ISD::SHL: case ISD::SRA: case ISD::SRL: RV = PromoteIntShiftOp(SDValue(N, 0)); break; case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: RV = PromoteExtend(SDValue(N, 0)); break; case ISD::LOAD: if (PromoteLoad(SDValue(N, 0))) RV = SDValue(N, 0); break; } } // If N is a commutative binary node, try to eliminate it if the commuted // version is already present in the DAG. if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); // Constant operands are canonicalized to RHS. if (N0 != N1 && (isa(N0) || !isa(N1))) { SDValue Ops[] = {N1, N0}; SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops, N->getFlags()); if (CSENode) return SDValue(CSENode, 0); } } return RV; } /// Given a node, return its input chain if it has one, otherwise return a null /// sd operand. static SDValue getInputChainForNode(SDNode *N) { if (unsigned NumOps = N->getNumOperands()) { if (N->getOperand(0).getValueType() == MVT::Other) return N->getOperand(0); if (N->getOperand(NumOps-1).getValueType() == MVT::Other) return N->getOperand(NumOps-1); for (unsigned i = 1; i < NumOps-1; ++i) if (N->getOperand(i).getValueType() == MVT::Other) return N->getOperand(i); } return SDValue(); } SDValue DAGCombiner::visitTokenFactor(SDNode *N) { // If N has two operands, where one has an input chain equal to the other, // the 'other' chain is redundant. if (N->getNumOperands() == 2) { if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1)) return N->getOperand(0); if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0)) return N->getOperand(1); } // Don't simplify token factors if optnone. if (OptLevel == CodeGenOpt::None) return SDValue(); // Don't simplify the token factor if the node itself has too many operands. if (N->getNumOperands() > TokenFactorInlineLimit) return SDValue(); // If the sole user is a token factor, we should make sure we have a // chance to merge them together. This prevents TF chains from inhibiting // optimizations. if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor) AddToWorklist(*(N->use_begin())); SmallVector TFs; // List of token factors to visit. SmallVector Ops; // Ops for replacing token factor. SmallPtrSet SeenOps; bool Changed = false; // If we should replace this token factor. // Start out with this token factor. TFs.push_back(N); // Iterate through token factors. The TFs grows when new token factors are // encountered. for (unsigned i = 0; i < TFs.size(); ++i) { // Limit number of nodes to inline, to avoid quadratic compile times. // We have to add the outstanding Token Factors to Ops, otherwise we might // drop Ops from the resulting Token Factors. if (Ops.size() > TokenFactorInlineLimit) { for (unsigned j = i; j < TFs.size(); j++) Ops.emplace_back(TFs[j], 0); // Drop unprocessed Token Factors from TFs, so we do not add them to the // combiner worklist later. TFs.resize(i); break; } SDNode *TF = TFs[i]; // Check each of the operands. for (const SDValue &Op : TF->op_values()) { switch (Op.getOpcode()) { case ISD::EntryToken: // Entry tokens don't need to be added to the list. They are // redundant. Changed = true; break; case ISD::TokenFactor: if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) { // Queue up for processing. TFs.push_back(Op.getNode()); Changed = true; break; } LLVM_FALLTHROUGH; default: // Only add if it isn't already in the list. if (SeenOps.insert(Op.getNode()).second) Ops.push_back(Op); else Changed = true; break; } } } // Re-visit inlined Token Factors, to clean them up in case they have been // removed. Skip the first Token Factor, as this is the current node. for (unsigned i = 1, e = TFs.size(); i < e; i++) AddToWorklist(TFs[i]); // Remove Nodes that are chained to another node in the list. Do so // by walking up chains breath-first stopping when we've seen // another operand. In general we must climb to the EntryNode, but we can exit // early if we find all remaining work is associated with just one operand as // no further pruning is possible. // List of nodes to search through and original Ops from which they originate. SmallVector, 8> Worklist; SmallVector OpWorkCount; // Count of work for each Op. SmallPtrSet SeenChains; bool DidPruneOps = false; unsigned NumLeftToConsider = 0; for (const SDValue &Op : Ops) { Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++)); OpWorkCount.push_back(1); } auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) { // If this is an Op, we can remove the op from the list. Remark any // search associated with it as from the current OpNumber. if (SeenOps.contains(Op)) { Changed = true; DidPruneOps = true; unsigned OrigOpNumber = 0; while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op) OrigOpNumber++; assert((OrigOpNumber != Ops.size()) && "expected to find TokenFactor Operand"); // Re-mark worklist from OrigOpNumber to OpNumber for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) { if (Worklist[i].second == OrigOpNumber) { Worklist[i].second = OpNumber; } } OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber]; OpWorkCount[OrigOpNumber] = 0; NumLeftToConsider--; } // Add if it's a new chain if (SeenChains.insert(Op).second) { OpWorkCount[OpNumber]++; Worklist.push_back(std::make_pair(Op, OpNumber)); } }; for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) { // We need at least be consider at least 2 Ops to prune. if (NumLeftToConsider <= 1) break; auto CurNode = Worklist[i].first; auto CurOpNumber = Worklist[i].second; assert((OpWorkCount[CurOpNumber] > 0) && "Node should not appear in worklist"); switch (CurNode->getOpcode()) { case ISD::EntryToken: // Hitting EntryToken is the only way for the search to terminate without // hitting // another operand's search. Prevent us from marking this operand // considered. NumLeftToConsider++; break; case ISD::TokenFactor: for (const SDValue &Op : CurNode->op_values()) AddToWorklist(i, Op.getNode(), CurOpNumber); break; case ISD::LIFETIME_START: case ISD::LIFETIME_END: case ISD::CopyFromReg: case ISD::CopyToReg: AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber); break; default: if (auto *MemNode = dyn_cast(CurNode)) AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber); break; } OpWorkCount[CurOpNumber]--; if (OpWorkCount[CurOpNumber] == 0) NumLeftToConsider--; } // If we've changed things around then replace token factor. if (Changed) { SDValue Result; if (Ops.empty()) { // The entry token is the only possible outcome. Result = DAG.getEntryNode(); } else { if (DidPruneOps) { SmallVector PrunedOps; // for (const SDValue &Op : Ops) { if (SeenChains.count(Op.getNode()) == 0) PrunedOps.push_back(Op); } Result = DAG.getTokenFactor(SDLoc(N), PrunedOps); } else { Result = DAG.getTokenFactor(SDLoc(N), Ops); } } return Result; } return SDValue(); } /// MERGE_VALUES can always be eliminated. SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { WorklistRemover DeadNodes(*this); // Replacing results may cause a different MERGE_VALUES to suddenly // be CSE'd with N, and carry its uses with it. Iterate until no // uses remain, to ensure that the node can be safely deleted. // First add the users of this node to the work list so that they // can be tried again once they have new operands. AddUsersToWorklist(N); do { // Do as a single replacement to avoid rewalking use lists. SmallVector Ops; for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) Ops.push_back(N->getOperand(i)); DAG.ReplaceAllUsesWith(N, Ops.data()); } while (!N->use_empty()); deleteAndRecombine(N); return SDValue(N, 0); // Return N so it doesn't get rechecked! } /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a /// ConstantSDNode pointer else nullptr. static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) { ConstantSDNode *Const = dyn_cast(N); return Const != nullptr && !Const->isOpaque() ? Const : nullptr; } /// Return true if 'Use' is a load or a store that uses N as its base pointer /// and that N may be folded in the load / store addressing mode. static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG, const TargetLowering &TLI) { EVT VT; unsigned AS; if (LoadSDNode *LD = dyn_cast(Use)) { if (LD->isIndexed() || LD->getBasePtr().getNode() != N) return false; VT = LD->getMemoryVT(); AS = LD->getAddressSpace(); } else if (StoreSDNode *ST = dyn_cast(Use)) { if (ST->isIndexed() || ST->getBasePtr().getNode() != N) return false; VT = ST->getMemoryVT(); AS = ST->getAddressSpace(); } else if (MaskedLoadSDNode *LD = dyn_cast(Use)) { if (LD->isIndexed() || LD->getBasePtr().getNode() != N) return false; VT = LD->getMemoryVT(); AS = LD->getAddressSpace(); } else if (MaskedStoreSDNode *ST = dyn_cast(Use)) { if (ST->isIndexed() || ST->getBasePtr().getNode() != N) return false; VT = ST->getMemoryVT(); AS = ST->getAddressSpace(); } else { return false; } TargetLowering::AddrMode AM; if (N->getOpcode() == ISD::ADD) { AM.HasBaseReg = true; ConstantSDNode *Offset = dyn_cast(N->getOperand(1)); if (Offset) // [reg +/- imm] AM.BaseOffs = Offset->getSExtValue(); else // [reg +/- reg] AM.Scale = 1; } else if (N->getOpcode() == ISD::SUB) { AM.HasBaseReg = true; ConstantSDNode *Offset = dyn_cast(N->getOperand(1)); if (Offset) // [reg +/- imm] AM.BaseOffs = -Offset->getSExtValue(); else // [reg +/- reg] AM.Scale = 1; } else { return false; } return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, VT.getTypeForEVT(*DAG.getContext()), AS); } /// This inverts a canonicalization in IR that replaces a variable select arm /// with an identity constant. Codegen improves if we re-use the variable /// operand rather than load a constant. This can also be converted into a /// masked vector operation if the target supports it. static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG, bool ShouldCommuteOperands) { // Match a select as operand 1. The identity constant that we are looking for // is only valid as operand 1 of a non-commutative binop. SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); if (ShouldCommuteOperands) std::swap(N0, N1); // TODO: Should this apply to scalar select too? if (!N1.hasOneUse() || N1.getOpcode() != ISD::VSELECT) return SDValue(); unsigned Opcode = N->getOpcode(); EVT VT = N->getValueType(0); SDValue Cond = N1.getOperand(0); SDValue TVal = N1.getOperand(1); SDValue FVal = N1.getOperand(2); // TODO: The cases should match with IR's ConstantExpr::getBinOpIdentity(). // TODO: Target-specific opcodes could be added. Ex: "isCommutativeBinOp()". // TODO: With fast-math (NSZ), allow the opposite-sign form of zero? auto isIdentityConstantForOpcode = [](unsigned Opcode, SDValue V) { if (ConstantFPSDNode *C = isConstOrConstSplatFP(V)) { switch (Opcode) { case ISD::FADD: // X + -0.0 --> X return C->isZero() && C->isNegative(); case ISD::FSUB: // X - 0.0 --> X return C->isZero() && !C->isNegative(); case ISD::FMUL: // X * 1.0 --> X case ISD::FDIV: // X / 1.0 --> X return C->isExactlyValue(1.0); } } if (ConstantSDNode *C = isConstOrConstSplat(V)) { switch (Opcode) { case ISD::ADD: // X + 0 --> X case ISD::SUB: // X - 0 --> X case ISD::SHL: // X << 0 --> X case ISD::SRA: // X s>> 0 --> X case ISD::SRL: // X u>> 0 --> X return C->isZero(); case ISD::MUL: // X * 1 --> X return C->isOne(); } } return false; }; // This transform increases uses of N0, so freeze it to be safe. // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal) if (isIdentityConstantForOpcode(Opcode, TVal)) { SDValue F0 = DAG.getFreeze(N0); SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags()); return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO); } // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0 if (isIdentityConstantForOpcode(Opcode, FVal)) { SDValue F0 = DAG.getFreeze(N0); SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags()); return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0); } return SDValue(); } SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 && "Unexpected binary operator"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); auto BinOpcode = BO->getOpcode(); EVT VT = BO->getValueType(0); if (TLI.shouldFoldSelectWithIdentityConstant(BinOpcode, VT)) { if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false)) return Sel; if (TLI.isCommutativeBinOp(BO->getOpcode())) if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true)) return Sel; } // Don't do this unless the old select is going away. We want to eliminate the // binary operator, not replace a binop with a select. // TODO: Handle ISD::SELECT_CC. unsigned SelOpNo = 0; SDValue Sel = BO->getOperand(0); if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) { SelOpNo = 1; Sel = BO->getOperand(1); } if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) return SDValue(); SDValue CT = Sel.getOperand(1); if (!isConstantOrConstantVector(CT, true) && !DAG.isConstantFPBuildVectorOrConstantFP(CT)) return SDValue(); SDValue CF = Sel.getOperand(2); if (!isConstantOrConstantVector(CF, true) && !DAG.isConstantFPBuildVectorOrConstantFP(CF)) return SDValue(); // Bail out if any constants are opaque because we can't constant fold those. // The exception is "and" and "or" with either 0 or -1 in which case we can // propagate non constant operands into select. I.e.: // and (select Cond, 0, -1), X --> select Cond, 0, X // or X, (select Cond, -1, 0) --> select Cond, -1, X bool CanFoldNonConst = (BinOpcode == ISD::AND || BinOpcode == ISD::OR) && (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) && (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF)); SDValue CBO = BO->getOperand(SelOpNo ^ 1); if (!CanFoldNonConst && !isConstantOrConstantVector(CBO, true) && !DAG.isConstantFPBuildVectorOrConstantFP(CBO)) return SDValue(); // We have a select-of-constants followed by a binary operator with a // constant. Eliminate the binop by pulling the constant math into the select. // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO SDLoc DL(Sel); SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT) : DAG.getNode(BinOpcode, DL, VT, CT, CBO); if (!CanFoldNonConst && !NewCT.isUndef() && !isConstantOrConstantVector(NewCT, true) && !DAG.isConstantFPBuildVectorOrConstantFP(NewCT)) return SDValue(); SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF) : DAG.getNode(BinOpcode, DL, VT, CF, CBO); if (!CanFoldNonConst && !NewCF.isUndef() && !isConstantOrConstantVector(NewCF, true) && !DAG.isConstantFPBuildVectorOrConstantFP(NewCF)) return SDValue(); SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF); SelectOp->setFlags(BO->getFlags()); return SelectOp; } static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) { assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && "Expecting add or sub"); // Match a constant operand and a zext operand for the math instruction: // add Z, C // sub C, Z bool IsAdd = N->getOpcode() == ISD::ADD; SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0); SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1); auto *CN = dyn_cast(C); if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND) return SDValue(); // Match the zext operand as a setcc of a boolean. if (Z.getOperand(0).getOpcode() != ISD::SETCC || Z.getOperand(0).getValueType() != MVT::i1) return SDValue(); // Match the compare as: setcc (X & 1), 0, eq. SDValue SetCC = Z.getOperand(0); ISD::CondCode CC = cast(SetCC->getOperand(2))->get(); if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) || SetCC.getOperand(0).getOpcode() != ISD::AND || !isOneConstant(SetCC.getOperand(0).getOperand(1))) return SDValue(); // We are adding/subtracting a constant and an inverted low bit. Turn that // into a subtract/add of the low bit with incremented/decremented constant: // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1)) // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1)) EVT VT = C.getValueType(); SDLoc DL(N); SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT); SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) : DAG.getConstant(CN->getAPIntValue() - 1, DL, VT); return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit); } /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into /// a shift and add with a different constant. static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) { assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && "Expecting add or sub"); // We need a constant operand for the add/sub, and the other operand is a // logical shift right: add (srl), C or sub C, (srl). bool IsAdd = N->getOpcode() == ISD::ADD; SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0); SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1); if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) || ShiftOp.getOpcode() != ISD::SRL) return SDValue(); // The shift must be of a 'not' value. SDValue Not = ShiftOp.getOperand(0); if (!Not.hasOneUse() || !isBitwiseNot(Not)) return SDValue(); // The shift must be moving the sign bit to the least-significant-bit. EVT VT = ShiftOp.getValueType(); SDValue ShAmt = ShiftOp.getOperand(1); ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt); if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1)) return SDValue(); // Eliminate the 'not' by adjusting the shift and add/sub constant: // add (srl (not X), 31), C --> add (sra X, 31), (C + 1) // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1) SDLoc DL(N); if (SDValue NewC = DAG.FoldConstantArithmetic( IsAdd ? ISD::ADD : ISD::SUB, DL, VT, {ConstantOp, DAG.getConstant(1, DL, VT)})) { SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT, Not.getOperand(0), ShAmt); return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC); } return SDValue(); } static bool isADDLike(SDValue V, const SelectionDAG &DAG) { unsigned Opcode = V.getOpcode(); if (Opcode == ISD::OR) return DAG.haveNoCommonBitsSet(V.getOperand(0), V.getOperand(1)); if (Opcode == ISD::XOR) return isMinSignedConstant(V.getOperand(1)); return false; } /// Try to fold a node that behaves like an ADD (note that N isn't necessarily /// an ISD::ADD here, it could for example be an ISD::OR if we know that there /// are no common bits set in the operands). SDValue DAGCombiner::visitADDLike(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); SDLoc DL(N); // fold (add x, undef) -> undef if (N0.isUndef()) return N0; if (N1.isUndef()) return N1; // fold (add c1, c2) -> c1+c2 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1})) return C; // canonicalize constant to RHS if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::ADD, DL, VT, N1, N0); // fold vector ops if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; // fold (add x, 0) -> x, vector edition if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) return N0; } // fold (add x, 0) -> x if (isNullConstant(N1)) return N0; if (N0.getOpcode() == ISD::SUB) { SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); // fold ((A-c1)+c2) -> (A+(c2-c1)) if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01})) return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub); // fold ((c1-A)+c2) -> (c1+c2)-A if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00})) return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1)); } // add (sext i1 X), 1 -> zext (not i1 X) // We don't transform this pattern: // add (zext i1 X), -1 -> sext (not i1 X) // because most (?) targets generate better code for the zext form. if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() && isOneOrOneSplat(N1)) { SDValue X = N0.getOperand(0); if ((!LegalOperations || (TLI.isOperationLegal(ISD::XOR, X.getValueType()) && TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) && X.getScalarValueSizeInBits() == 1) { SDValue Not = DAG.getNOT(DL, X, X.getValueType()); return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not); } } // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) // iff (or x, c0) is equivalent to (add x, c0). // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1)) // iff (xor x, c0) is equivalent to (add x, c0). if (isADDLike(N0, DAG)) { SDValue N01 = N0.getOperand(1); if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01})) return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add); } if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; // reassociate add if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) { if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags())) return RADD; // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is // equivalent to (add x, c). // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is // equivalent to (add x, c). auto ReassociateAddOr = [&](SDValue N0, SDValue N1) { if (isADDLike(N0, DAG) && N0.hasOneUse() && isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) { return DAG.getNode(ISD::ADD, DL, VT, DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)), N0.getOperand(1)); } return SDValue(); }; if (SDValue Add = ReassociateAddOr(N0, N1)) return Add; if (SDValue Add = ReassociateAddOr(N1, N0)) return Add; } // fold ((0-A) + B) -> B-A if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0))) return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1)); // fold (A + (0-B)) -> A-B if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0))) return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1)); // fold (A+(B-A)) -> B if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) return N1.getOperand(0); // fold ((B-A)+A) -> B if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1)) return N0.getOperand(0); // fold ((A-B)+(C-A)) -> (C-B) if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB && N0.getOperand(0) == N1.getOperand(1)) return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0), N0.getOperand(1)); // fold ((A-B)+(B-C)) -> (A-C) if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB && N0.getOperand(1) == N1.getOperand(0)) return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1.getOperand(1)); // fold (A+(B-(A+C))) to (B-C) if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && N0 == N1.getOperand(1).getOperand(0)) return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0), N1.getOperand(1).getOperand(1)); // fold (A+(B-(C+A))) to (B-C) if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && N0 == N1.getOperand(1).getOperand(1)) return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0), N1.getOperand(1).getOperand(0)); // fold (A+((B-A)+or-C)) to (B+or-C) if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) && N1.getOperand(0).getOpcode() == ISD::SUB && N0 == N1.getOperand(0).getOperand(1)) return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0), N1.getOperand(1)); // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB && N0->hasOneUse() && N1->hasOneUse()) { SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); SDValue N10 = N1.getOperand(0); SDValue N11 = N1.getOperand(1); if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10)) return DAG.getNode(ISD::SUB, DL, VT, DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10), DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11)); } // fold (add (umax X, C), -C) --> (usubsat X, C) if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) { auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) { return (!Max && !Op) || (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue())); }; if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT, /*AllowUndefs*/ true)) return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), N0.getOperand(1)); } if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); if (isOneOrOneSplat(N1)) { // fold (add (xor a, -1), 1) -> (sub 0, a) if (isBitwiseNot(N0)) return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0.getOperand(0)); // fold (add (add (xor a, -1), b), 1) -> (sub b, a) if (N0.getOpcode() == ISD::ADD) { SDValue A, Xor; if (isBitwiseNot(N0.getOperand(0))) { A = N0.getOperand(1); Xor = N0.getOperand(0); } else if (isBitwiseNot(N0.getOperand(1))) { A = N0.getOperand(0); Xor = N0.getOperand(1); } if (Xor) return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0)); } // Look for: // add (add x, y), 1 // And if the target does not like this form then turn into: // sub y, (xor x, -1) if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD && N0.hasOneUse()) { SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0), DAG.getAllOnesConstant(DL, VT)); return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not); } } // (x - y) + -1 -> add (xor y, -1), x if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() && isAllOnesOrAllOnesSplat(N1)) { SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1); return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0)); } if (SDValue Combined = visitADDLikeCommutative(N0, N1, N)) return Combined; if (SDValue Combined = visitADDLikeCommutative(N1, N0, N)) return Combined; return SDValue(); } SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); SDLoc DL(N); if (SDValue Combined = visitADDLike(N)) return Combined; if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG)) return V; if (SDValue V = foldAddSubOfSignBit(N, DAG)) return V; // fold (a+b) -> (a|b) iff a and b share no bits. if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) && DAG.haveNoCommonBitsSet(N0, N1)) return DAG.getNode(ISD::OR, DL, VT, N0, N1); // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)). if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) { const APInt &C0 = N0->getConstantOperandAPInt(0); const APInt &C1 = N1->getConstantOperandAPInt(0); return DAG.getVScale(DL, VT, C0 + C1); } // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2) if ((N0.getOpcode() == ISD::ADD) && (N0.getOperand(1).getOpcode() == ISD::VSCALE) && (N1.getOpcode() == ISD::VSCALE)) { const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0); const APInt &VS1 = N1->getConstantOperandAPInt(0); SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1); return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS); } // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2)) if (N0.getOpcode() == ISD::STEP_VECTOR && N1.getOpcode() == ISD::STEP_VECTOR) { const APInt &C0 = N0->getConstantOperandAPInt(0); const APInt &C1 = N1->getConstantOperandAPInt(0); APInt NewStep = C0 + C1; return DAG.getStepVector(DL, VT, NewStep); } // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2) if ((N0.getOpcode() == ISD::ADD) && (N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR) && (N1.getOpcode() == ISD::STEP_VECTOR)) { const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0); const APInt &SV1 = N1->getConstantOperandAPInt(0); APInt NewStep = SV0 + SV1; SDValue SV = DAG.getStepVector(DL, VT, NewStep); return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV); } return SDValue(); } SDValue DAGCombiner::visitADDSAT(SDNode *N) { unsigned Opcode = N->getOpcode(); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); SDLoc DL(N); // fold (add_sat x, undef) -> -1 if (N0.isUndef() || N1.isUndef()) return DAG.getAllOnesConstant(DL, VT); // fold (add_sat c1, c2) -> c3 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1})) return C; // canonicalize constant to RHS if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(Opcode, DL, VT, N1, N0); // fold vector ops if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; // fold (add_sat x, 0) -> x, vector edition if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) return N0; } // fold (add_sat x, 0) -> x if (isNullConstant(N1)) return N0; // If it cannot overflow, transform into an add. if (Opcode == ISD::UADDSAT) if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never) return DAG.getNode(ISD::ADD, DL, VT, N0, N1); return SDValue(); } static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) { bool Masked = false; // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization. while (true) { if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) { V = V.getOperand(0); continue; } if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) { Masked = true; V = V.getOperand(0); continue; } break; } // If this is not a carry, return. if (V.getResNo() != 1) return SDValue(); if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY && V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO) return SDValue(); EVT VT = V->getValueType(0); if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT)) return SDValue(); // If the result is masked, then no matter what kind of bool it is we can // return. If it isn't, then we need to make sure the bool type is either 0 or // 1 and not other values. if (Masked || TLI.getBooleanContents(V.getValueType()) == TargetLoweringBase::ZeroOrOneBooleanContent) return V; return SDValue(); } /// Given the operands of an add/sub operation, see if the 2nd operand is a /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert /// the opcode and bypass the mask operation. static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1, SelectionDAG &DAG, const SDLoc &DL) { if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1))) return SDValue(); EVT VT = N0.getValueType(); if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits()) return SDValue(); // add N0, (and (AssertSext X, i1), 1) --> sub N0, X // sub N0, (and (AssertSext X, i1), 1) --> add N0, X return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0)); } /// Helper for doing combines based on N0 and N1 being added to each other. SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference) { EVT VT = N0.getValueType(); SDLoc DL(LocReference); // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0).getOperand(0))) return DAG.getNode(ISD::SUB, DL, VT, N0, DAG.getNode(ISD::SHL, DL, VT, N1.getOperand(0).getOperand(1), N1.getOperand(1))); if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL)) return V; // Look for: // add (add x, 1), y // And if the target does not like this form then turn into: // sub y, (xor x, -1) if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD && N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1))) { SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0), DAG.getAllOnesConstant(DL, VT)); return DAG.getNode(ISD::SUB, DL, VT, N1, Not); } if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) { // Hoist one-use subtraction by non-opaque constant: // (x - C) + y -> (x + y) - C // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors. if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) { SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1); return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1)); } // Hoist one-use subtraction from non-opaque constant: // (C - x) + y -> (y - x) + C if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) { SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1)); return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0)); } } // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1' // rather than 'add 0/-1' (the zext should get folded). // add (sext i1 Y), X --> sub X, (zext i1 Y) if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.getOperand(0).getScalarValueSizeInBits() == 1 && TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) { SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); } // add X, (sextinreg Y i1) -> sub X, (and Y 1) if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { VTSDNode *TN = cast(N1.getOperand(1)); if (TN->getVT() == MVT::i1) { SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), DAG.getConstant(1, DL, VT)); return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt); } } // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry) if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) && N1.getResNo() == 0) return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(), N0, N1.getOperand(0), N1.getOperand(2)); // (add X, Carry) -> (addcarry X, 0, Carry) if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) if (SDValue Carry = getAsCarry(TLI, N1)) return DAG.getNode(ISD::ADDCARRY, DL, DAG.getVTList(VT, Carry.getValueType()), N0, DAG.getConstant(0, DL, VT), Carry); return SDValue(); } SDValue DAGCombiner::visitADDC(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); SDLoc DL(N); // If the flag result is dead, turn this into an ADD. if (!N->hasAnyUseOfValue(1)) return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); // canonicalize constant to RHS. ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); if (N0C && !N1C) return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0); // fold (addc x, 0) -> x + no carry out if (isNullConstant(N1)) return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); // If it cannot overflow, transform into an add. if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never) return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); return SDValue(); } /** * Flips a boolean if it is cheaper to compute. If the Force parameters is set, * then the flip also occurs if computing the inverse is the same cost. * This function returns an empty SDValue in case it cannot flip the boolean * without increasing the cost of the computation. If you want to flip a boolean * no matter what, use DAG.getLogicalNOT. */ static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG, const TargetLowering &TLI, bool Force) { if (Force && isa(V)) return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType()); if (V.getOpcode() != ISD::XOR) return SDValue(); ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false); if (!Const) return SDValue(); EVT VT = V.getValueType(); bool IsFlip = false; switch(TLI.getBooleanContents(VT)) { case TargetLowering::ZeroOrOneBooleanContent: IsFlip = Const->isOne(); break; case TargetLowering::ZeroOrNegativeOneBooleanContent: IsFlip = Const->isAllOnes(); break; case TargetLowering::UndefinedBooleanContent: IsFlip = (Const->getAPIntValue() & 0x01) == 1; break; } if (IsFlip) return V.getOperand(0); if (Force) return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType()); return SDValue(); } SDValue DAGCombiner::visitADDO(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); bool IsSigned = (ISD::SADDO == N->getOpcode()); EVT CarryVT = N->getValueType(1); SDLoc DL(N); // If the flag result is dead, turn this into an ADD. if (!N->hasAnyUseOfValue(1)) return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), DAG.getUNDEF(CarryVT)); // canonicalize constant to RHS. if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0); // fold (addo x, 0) -> x + no carry out if (isNullOrNullSplat(N1)) return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT)); if (!IsSigned) { // If it cannot overflow, transform into an add. if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never) return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), DAG.getConstant(0, DL, CarryVT)); // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry. if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) { SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(), DAG.getConstant(0, DL, VT), N0.getOperand(0)); return CombineTo( N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1))); } if (SDValue Combined = visitUADDOLike(N0, N1, N)) return Combined; if (SDValue Combined = visitUADDOLike(N1, N0, N)) return Combined; } return SDValue(); } SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) { EVT VT = N0.getValueType(); if (VT.isVector()) return SDValue(); // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry) // If Y + 1 cannot overflow. if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) { SDValue Y = N1.getOperand(0); SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType()); if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never) return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y, N1.getOperand(2)); } // (uaddo X, Carry) -> (addcarry X, 0, Carry) if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) if (SDValue Carry = getAsCarry(TLI, N1)) return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, DAG.getConstant(0, SDLoc(N), VT), Carry); return SDValue(); } SDValue DAGCombiner::visitADDE(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue CarryIn = N->getOperand(2); // canonicalize constant to RHS ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); if (N0C && !N1C) return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(), N1, N0, CarryIn); // fold (adde x, y, false) -> (addc x, y) if (CarryIn.getOpcode() == ISD::CARRY_FALSE) return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1); return SDValue(); } SDValue DAGCombiner::visitADDCARRY(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue CarryIn = N->getOperand(2); SDLoc DL(N); // canonicalize constant to RHS ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); if (N0C && !N1C) return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn); // fold (addcarry x, y, false) -> (uaddo x, y) if (isNullConstant(CarryIn)) { if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0))) return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1); } // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry. if (isNullConstant(N0) && isNullConstant(N1)) { EVT VT = N0.getValueType(); EVT CarryVT = CarryIn.getValueType(); SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT); AddToWorklist(CarryExt.getNode()); return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt, DAG.getConstant(1, DL, VT)), DAG.getConstant(0, DL, CarryVT)); } if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N)) return Combined; if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N)) return Combined; return SDValue(); } SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue CarryIn = N->getOperand(2); SDLoc DL(N); // canonicalize constant to RHS ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); if (N0C && !N1C) return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn); // fold (saddo_carry x, y, false) -> (saddo x, y) if (isNullConstant(CarryIn)) { if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0))) return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1); } return SDValue(); } /** * If we are facing some sort of diamond carry propapagtion pattern try to * break it up to generate something like: * (addcarry X, 0, (addcarry A, B, Z):Carry) * * The end result is usually an increase in operation required, but because the * carry is now linearized, other tranforms can kick in and optimize the DAG. * * Patterns typically look something like * (uaddo A, B) * / \ * Carry Sum * | \ * | (addcarry *, 0, Z) * | / * \ Carry * | / * (addcarry X, *, *) * * But numerous variation exist. Our goal is to identify A, B, X and Z and * produce a combine with a single path for carry propagation. */ static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, SDValue X, SDValue Carry0, SDValue Carry1, SDNode *N) { if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1) return SDValue(); if (Carry1.getOpcode() != ISD::UADDO) return SDValue(); SDValue Z; /** * First look for a suitable Z. It will present itself in the form of * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true */ if (Carry0.getOpcode() == ISD::ADDCARRY && isNullConstant(Carry0.getOperand(1))) { Z = Carry0.getOperand(2); } else if (Carry0.getOpcode() == ISD::UADDO && isOneConstant(Carry0.getOperand(1))) { EVT VT = Combiner.getSetCCResultType(Carry0.getValueType()); Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT); } else { // We couldn't find a suitable Z. return SDValue(); } auto cancelDiamond = [&](SDValue A,SDValue B) { SDLoc DL(N); SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z); Combiner.AddToWorklist(NewY.getNode()); return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X, DAG.getConstant(0, DL, X.getValueType()), NewY.getValue(1)); }; /** * (uaddo A, B) * | * Sum * | * (addcarry *, 0, Z) */ if (Carry0.getOperand(0) == Carry1.getValue(0)) { return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1)); } /** * (addcarry A, 0, Z) * | * Sum * | * (uaddo *, B) */ if (Carry1.getOperand(0) == Carry0.getValue(0)) { return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1)); } if (Carry1.getOperand(1) == Carry0.getValue(0)) { return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0)); } return SDValue(); } // If we are facing some sort of diamond carry/borrow in/out pattern try to // match patterns like: // // (uaddo A, B) CarryIn // | \ | // | \ | // PartialSum PartialCarryOutX / // | | / // | ____|____________/ // | / | // (uaddo *, *) \________ // | \ \ // | \ | // | PartialCarryOutY | // | \ | // | \ / // AddCarrySum | ______/ // | / // CarryOut = (or *, *) // // And generate ADDCARRY (or SUBCARRY) with two result values: // // {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn) // // Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with // a single path for carry/borrow out propagation: static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI, SDValue N0, SDValue N1, SDNode *N) { SDValue Carry0 = getAsCarry(TLI, N0); if (!Carry0) return SDValue(); SDValue Carry1 = getAsCarry(TLI, N1); if (!Carry1) return SDValue(); unsigned Opcode = Carry0.getOpcode(); if (Opcode != Carry1.getOpcode()) return SDValue(); if (Opcode != ISD::UADDO && Opcode != ISD::USUBO) return SDValue(); // Canonicalize the add/sub of A and B (the top node in the above ASCII art) // as Carry0 and the add/sub of the carry in as Carry1 (the middle node). if (Carry1.getNode()->isOperandOf(Carry0.getNode())) std::swap(Carry0, Carry1); // Check if nodes are connected in expected way. if (Carry1.getOperand(0) != Carry0.getValue(0) && Carry1.getOperand(1) != Carry0.getValue(0)) return SDValue(); // The carry in value must be on the righthand side for subtraction. unsigned CarryInOperandNum = Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0; if (Opcode == ISD::USUBO && CarryInOperandNum != 1) return SDValue(); SDValue CarryIn = Carry1.getOperand(CarryInOperandNum); unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY; if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType())) return SDValue(); // Verify that the carry/borrow in is plausibly a carry/borrow bit. // TODO: make getAsCarry() aware of how partial carries are merged. if (CarryIn.getOpcode() != ISD::ZERO_EXTEND) return SDValue(); CarryIn = CarryIn.getOperand(0); if (CarryIn.getValueType() != MVT::i1) return SDValue(); SDLoc DL(N); SDValue Merged = DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0), Carry0.getOperand(1), CarryIn); // Please note that because we have proven that the result of the UADDO/USUBO // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can // therefore prove that if the first UADDO/USUBO overflows, the second // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the // maximum value. // // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow) // // This is important because it means that OR and XOR can be used to merge // carry flags; and that AND can return a constant zero. // // TODO: match other operations that can merge flags (ADD, etc) DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0)); if (N->getOpcode() == ISD::AND) return DAG.getConstant(0, DL, MVT::i1); return Merged.getValue(1); } SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N) { // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry. if (isBitwiseNot(N0)) if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) { SDLoc DL(N); SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1, N0.getOperand(0), NotC); return CombineTo( N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1))); } // Iff the flag result is dead: // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry) // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo // or the dependency between the instructions. if ((N0.getOpcode() == ISD::ADD || (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 && N0.getValue(1) != CarryIn)) && isNullConstant(N1) && !N->hasAnyUseOfValue(1)) return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0.getOperand(0), N0.getOperand(1), CarryIn); /** * When one of the addcarry argument is itself a carry, we may be facing * a diamond carry propagation. In which case we try to transform the DAG * to ensure linear carry propagation if that is possible. */ if (auto Y = getAsCarry(TLI, N1)) { // Because both are carries, Y and Z can be swapped. if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N)) return R; if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N)) return R; } return SDValue(); } // Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a // clamp/truncation if necessary. static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL) { assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() && "Illegal truncation"); if (DstVT == SrcVT) return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS); // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by // clamping RHS. APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(), DstVT.getScalarSizeInBits()); if (!DAG.MaskedValueIsZero(LHS, UpperBits)) return SDValue(); SDValue SatLimit = DAG.getConstant(APInt::getLowBitsSet(SrcVT.getScalarSizeInBits(), DstVT.getScalarSizeInBits()), DL, SrcVT); RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit); RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS); LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS); return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS); } // Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to // usubsat(a,b), optionally as a truncated type. SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) { if (N->getOpcode() != ISD::SUB || !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT))) return SDValue(); EVT SubVT = N->getValueType(0); SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); // Try to find umax(a,b) - b or a - umin(a,b) patterns // they may be converted to usubsat(a,b). if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) { SDValue MaxLHS = Op0.getOperand(0); SDValue MaxRHS = Op0.getOperand(1); if (MaxLHS == Op1) return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N)); if (MaxRHS == Op1) return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N)); } if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) { SDValue MinLHS = Op1.getOperand(0); SDValue MinRHS = Op1.getOperand(1); if (MinLHS == Op0) return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N)); if (MinRHS == Op0) return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N)); } // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit))) if (Op1.getOpcode() == ISD::TRUNCATE && Op1.getOperand(0).getOpcode() == ISD::UMIN && Op1.getOperand(0).hasOneUse()) { SDValue MinLHS = Op1.getOperand(0).getOperand(0); SDValue MinRHS = Op1.getOperand(0).getOperand(1); if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0) return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS, DAG, SDLoc(N)); if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0) return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS, DAG, SDLoc(N)); } return SDValue(); } // Since it may not be valid to emit a fold to zero for vector initializers // check if we can before folding. static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations) { if (!VT.isVector()) return DAG.getConstant(0, DL, VT); if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) return DAG.getConstant(0, DL, VT); return SDValue(); } SDValue DAGCombiner::visitSUB(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); SDLoc DL(N); auto PeekThroughFreeze = [](SDValue N) { if (N->getOpcode() == ISD::FREEZE && N.hasOneUse()) return N->getOperand(0); return N; }; // fold (sub x, x) -> 0 // FIXME: Refactor this and xor and other similar operations together. if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1)) return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations); // fold (sub c1, c2) -> c3 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1})) return C; // fold vector ops if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; // fold (sub x, 0) -> x, vector edition if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) return N0; } if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); // fold (sub x, c) -> (add x, -c) if (N1C) { return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getConstant(-N1C->getAPIntValue(), DL, VT)); } if (isNullOrNullSplat(N0)) { unsigned BitWidth = VT.getScalarSizeInBits(); // Right-shifting everything out but the sign bit followed by negation is // the same as flipping arithmetic/logical shift type without the negation: // -(X >>u 31) -> (X >>s 31) // -(X >>s 31) -> (X >>u 31) if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) { ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1)); if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) { auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA; if (!LegalOperations || TLI.isOperationLegal(NewSh, VT)) return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1)); } } // 0 - X --> 0 if the sub is NUW. if (N->getFlags().hasNoUnsignedWrap()) return N0; if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) { // N1 is either 0 or the minimum signed value. If the sub is NSW, then // N1 must be 0 because negating the minimum signed value is undefined. if (N->getFlags().hasNoSignedWrap()) return N0; // 0 - X --> X if X is 0 or the minimum signed value. return N1; } // Convert 0 - abs(x). if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() && !TLI.isOperationLegalOrCustom(ISD::ABS, VT)) if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true)) return Result; // Fold neg(splat(neg(x)) -> splat(x) if (VT.isVector()) { SDValue N1S = DAG.getSplatValue(N1, true); if (N1S && N1S.getOpcode() == ISD::SUB && isNullConstant(N1S.getOperand(0))) { if (VT.isScalableVector()) return DAG.getSplatVector(VT, DL, N1S.getOperand(1)); return DAG.getSplatBuildVector(VT, DL, N1S.getOperand(1)); } } } // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) if (isAllOnesOrAllOnesSplat(N0)) return DAG.getNode(ISD::XOR, DL, VT, N1, N0); // fold (A - (0-B)) -> A+B if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0))) return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1)); // fold A-(A-B) -> B if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) return N1.getOperand(1); // fold (A+B)-A -> B if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1) return N0.getOperand(1); // fold (A+B)-B -> A if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) return N0.getOperand(0); // fold (A+C1)-C2 -> A+(C1-C2) if (N0.getOpcode() == ISD::ADD) { SDValue N01 = N0.getOperand(1); if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1})) return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC); } // fold C2-(A+C1) -> (C2-C1)-A if (N1.getOpcode() == ISD::ADD) { SDValue N11 = N1.getOperand(1); if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11})) return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0)); } // fold (A-C1)-C2 -> A-(C1+C2) if (N0.getOpcode() == ISD::SUB) { SDValue N01 = N0.getOperand(1); if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1})) return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC); } // fold (c1-A)-c2 -> (c1-c2)-A if (N0.getOpcode() == ISD::SUB) { SDValue N00 = N0.getOperand(0); if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1})) return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1)); } // fold ((A+(B+or-C))-B) -> A+or-C if (N0.getOpcode() == ISD::ADD && (N0.getOperand(1).getOpcode() == ISD::SUB || N0.getOperand(1).getOpcode() == ISD::ADD) && N0.getOperand(1).getOperand(0) == N1) return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0), N0.getOperand(1).getOperand(1)); // fold ((A+(C+B))-B) -> A+C if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD && N0.getOperand(1).getOperand(1) == N1) return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N0.getOperand(1).getOperand(0)); // fold ((A-(B-C))-C) -> A-B if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB && N0.getOperand(1).getOperand(1) == N1) return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N0.getOperand(1).getOperand(0)); // fold (A-(B-C)) -> A+(C-B) if (N1.getOpcode() == ISD::SUB && N1.hasOneUse()) return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1), N1.getOperand(0))); // A - (A & B) -> A & (~B) if (N1.getOpcode() == ISD::AND) { SDValue A = N1.getOperand(0); SDValue B = N1.getOperand(1); if (A != N0) std::swap(A, B); if (A == N0 && (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) { SDValue InvB = DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT)); return DAG.getNode(ISD::AND, DL, VT, A, InvB); } } // fold (X - (-Y * Z)) -> (X + (Y * Z)) if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) { if (N1.getOperand(0).getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0).getOperand(0))) { SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, N1.getOperand(0).getOperand(1), N1.getOperand(1)); return DAG.getNode(ISD::ADD, DL, VT, N0, Mul); } if (N1.getOperand(1).getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(1).getOperand(0))) { SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, N1.getOperand(0), N1.getOperand(1).getOperand(1)); return DAG.getNode(ISD::ADD, DL, VT, N0, Mul); } } // If either operand of a sub is undef, the result is undef if (N0.isUndef()) return N0; if (N1.isUndef()) return N1; if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG)) return V; if (SDValue V = foldAddSubOfSignBit(N, DAG)) return V; if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N))) return V; if (SDValue V = foldSubToUSubSat(VT, N)) return V; // (x - y) - 1 -> add (xor y, -1), x if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) { SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), DAG.getAllOnesConstant(DL, VT)); return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0)); } // Look for: // sub y, (xor x, -1) // And if the target does not like this form then turn into: // add (add x, y), 1 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) { SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0)); return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT)); } // Hoist one-use addition by non-opaque constant: // (x + C) - y -> (x - y) + C if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD && isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) { SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1); return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1)); } // y - (x + C) -> (y - x) - C if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD && isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) { SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0)); return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1)); } // (x - C) - y -> (x - y) - C // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors. if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) { SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1); return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1)); } // (C - x) - y -> C - (x + y) if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) { SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1); return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add); } // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1' // rather than 'sub 0/1' (the sext should get folded). // sub X, (zext i1 Y) --> add X, (sext i1 Y) if (N1.getOpcode() == ISD::ZERO_EXTEND && N1.getOperand(0).getScalarValueSizeInBits() == 1 && TLI.getBooleanContents(VT) == TargetLowering::ZeroOrNegativeOneBooleanContent) { SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0)); return DAG.getNode(ISD::ADD, DL, VT, N0, SExt); } // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X) if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) { SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1); SDValue S0 = N1.getOperand(0); if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1))) if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1)) return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0); } } // If the relocation model supports it, consider symbol offsets. if (GlobalAddressSDNode *GA = dyn_cast(N0)) if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { // fold (sub Sym, c) -> Sym-c if (N1C && GA->getOpcode() == ISD::GlobalAddress) return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, GA->getOffset() - (uint64_t)N1C->getSExtValue()); // fold (sub Sym+c1, Sym+c2) -> c1-c2 if (GlobalAddressSDNode *GB = dyn_cast(N1)) if (GA->getGlobal() == GB->getGlobal()) return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(), DL, VT); } // sub X, (sextinreg Y i1) -> add X, (and Y 1) if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { VTSDNode *TN = cast(N1.getOperand(1)); if (TN->getVT() == MVT::i1) { SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), DAG.getConstant(1, DL, VT)); return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt); } } // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C)) if (N1.getOpcode() == ISD::VSCALE) { const APInt &IntVal = N1.getConstantOperandAPInt(0); return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal)); } // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C)) if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) { APInt NewStep = -N1.getConstantOperandAPInt(0); return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getStepVector(DL, VT, NewStep)); } // Prefer an add for more folding potential and possibly better codegen: // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1) if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) { SDValue ShAmt = N1.getOperand(1); ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt); if (ShAmtC && ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) { SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt); return DAG.getNode(ISD::ADD, DL, VT, N0, SRA); } } // As with the previous fold, prefer add for more folding potential. // Subtracting SMIN/0 is the same as adding SMIN/0: // N0 - (X << BW-1) --> N0 + (X << BW-1) if (N1.getOpcode() == ISD::SHL) { ConstantSDNode *ShlC = isConstOrConstSplat(N1.getOperand(1)); if (ShlC && ShlC->getAPIntValue() == VT.getScalarSizeInBits() - 1) return DAG.getNode(ISD::ADD, DL, VT, N1, N0); } if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) { // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry) if (SDValue Carry = getAsCarry(TLI, N0)) { SDValue X = N1; SDValue Zero = DAG.getConstant(0, DL, VT); SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X); return DAG.getNode(ISD::ADDCARRY, DL, DAG.getVTList(VT, Carry.getValueType()), NegX, Zero, Carry); } } // If there's no chance of borrowing from adjacent bits, then sub is xor: // sub C0, X --> xor X, C0 if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) { if (!C0->isOpaque()) { const APInt &C0Val = C0->getAPIntValue(); const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero; if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes)) return DAG.getNode(ISD::XOR, DL, VT, N1, N0); } } return SDValue(); } SDValue DAGCombiner::visitSUBSAT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); SDLoc DL(N); // fold (sub_sat x, undef) -> 0 if (N0.isUndef() || N1.isUndef()) return DAG.getConstant(0, DL, VT); // fold (sub_sat x, x) -> 0 if (N0 == N1) return DAG.getConstant(0, DL, VT); // fold (sub_sat c1, c2) -> c3 if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1})) return C; // fold vector ops if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; // fold (sub_sat x, 0) -> x, vector edition if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) return N0; } // fold (sub_sat x, 0) -> x if (isNullConstant(N1)) return N0; return SDValue(); } SDValue DAGCombiner::visitSUBC(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); SDLoc DL(N); // If the flag result is dead, turn this into an SUB. if (!N->hasAnyUseOfValue(1)) return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1), DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); // fold (subc x, x) -> 0 + no borrow if (N0 == N1) return CombineTo(N, DAG.getConstant(0, DL, VT), DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); // fold (subc x, 0) -> x + no borrow if (isNullConstant(N1)) return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow if (isAllOnesConstant(N0)) return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0), DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); return SDValue(); } SDValue DAGCombiner::visitSUBO(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); bool IsSigned = (ISD::SSUBO == N->getOpcode()); EVT CarryVT = N->getValueType(1); SDLoc DL(N); // If the flag result is dead, turn this into an SUB. if (!N->hasAnyUseOfValue(1)) return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1), DAG.getUNDEF(CarryVT)); // fold (subo x, x) -> 0 + no borrow if (N0 == N1) return CombineTo(N, DAG.getConstant(0, DL, VT), DAG.getConstant(0, DL, CarryVT)); ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); // fold (subox, c) -> (addo x, -c) if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) { return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, DAG.getConstant(-N1C->getAPIntValue(), DL, VT)); } // fold (subo x, 0) -> x + no borrow if (isNullOrNullSplat(N1)) return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT)); // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow if (!IsSigned && isAllOnesOrAllOnesSplat(N0)) return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0), DAG.getConstant(0, DL, CarryVT)); return SDValue(); } SDValue DAGCombiner::visitSUBE(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue CarryIn = N->getOperand(2); // fold (sube x, y, false) -> (subc x, y) if (CarryIn.getOpcode() == ISD::CARRY_FALSE) return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1); return SDValue(); } SDValue DAGCombiner::visitSUBCARRY(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue CarryIn = N->getOperand(2); // fold (subcarry x, y, false) -> (usubo x, y) if (isNullConstant(CarryIn)) { if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0))) return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1); } return SDValue(); } SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue CarryIn = N->getOperand(2); // fold (ssubo_carry x, y, false) -> (ssubo x, y) if (isNullConstant(CarryIn)) { if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0))) return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1); } return SDValue(); } // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and // UMULFIXSAT here. SDValue DAGCombiner::visitMULFIX(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue Scale = N->getOperand(2); EVT VT = N0.getValueType(); // fold (mulfix x, undef, scale) -> 0 if (N0.isUndef() || N1.isUndef()) return DAG.getConstant(0, SDLoc(N), VT); // Canonicalize constant to RHS (vector doesn't have to splat) if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale); // fold (mulfix x, 0, scale) -> 0 if (isNullConstant(N1)) return DAG.getConstant(0, SDLoc(N), VT); return SDValue(); } SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); SDLoc DL(N); // fold (mul x, undef) -> 0 if (N0.isUndef() || N1.isUndef()) return DAG.getConstant(0, DL, VT); // fold (mul c1, c2) -> c1*c2 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1})) return C; // canonicalize constant to RHS (vector doesn't have to splat) if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::MUL, DL, VT, N1, N0); bool N1IsConst = false; bool N1IsOpaqueConst = false; APInt ConstValue1; // fold vector ops if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1); assert((!N1IsConst || ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) && "Splat APInt should be element width"); } else { N1IsConst = isa(N1); if (N1IsConst) { ConstValue1 = cast(N1)->getAPIntValue(); N1IsOpaqueConst = cast(N1)->isOpaque(); } } // fold (mul x, 0) -> 0 if (N1IsConst && ConstValue1.isZero()) return N1; // fold (mul x, 1) -> x if (N1IsConst && ConstValue1.isOne()) return N0; if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; // fold (mul x, -1) -> 0-x if (N1IsConst && ConstValue1.isAllOnes()) return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); // fold (mul x, (1 << c)) -> x << c if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && DAG.isKnownToBeAPowerOfTwo(N1) && (!VT.isVector() || Level <= AfterLegalizeVectorOps)) { SDValue LogBase2 = BuildLogBase2(N1, DL); EVT ShiftVT = getShiftAmountTy(N0.getValueType()); SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT); return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc); } // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) { unsigned Log2Val = (-ConstValue1).logBase2(); // FIXME: If the input is something that is easily negated (e.g. a // single-use add), we should put the negate there. return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(Log2Val, DL, getShiftAmountTy(N0.getValueType())))); } // Try to transform: // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub. // mul x, (2^N + 1) --> add (shl x, N), x // mul x, (2^N - 1) --> sub (shl x, N), x // Examples: x * 33 --> (x << 5) + x // x * 15 --> (x << 4) - x // x * -33 --> -((x << 5) + x) // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4) // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub. // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M)) // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M)) // Examples: x * 0x8800 --> (x << 15) + (x << 11) // x * 0xf800 --> (x << 16) - (x << 11) // x * -0x8800 --> -((x << 15) + (x << 11)) // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16) if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) { // TODO: We could handle more general decomposition of any constant by // having the target set a limit on number of ops and making a // callback to determine that sequence (similar to sqrt expansion). unsigned MathOp = ISD::DELETED_NODE; APInt MulC = ConstValue1.abs(); // The constant `2` should be treated as (2^0 + 1). unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros(); MulC.lshrInPlace(TZeros); if ((MulC - 1).isPowerOf2()) MathOp = ISD::ADD; else if ((MulC + 1).isPowerOf2()) MathOp = ISD::SUB; if (MathOp != ISD::DELETED_NODE) { unsigned ShAmt = MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2(); ShAmt += TZeros; assert(ShAmt < VT.getScalarSizeInBits() && "multiply-by-constant generated out of bounds shift"); SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT)); SDValue R = TZeros ? DAG.getNode(MathOp, DL, VT, Shl, DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(TZeros, DL, VT))) : DAG.getNode(MathOp, DL, VT, Shl, N0); if (ConstValue1.isNegative()) R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R); return R; } } // (mul (shl X, c1), c2) -> (mul X, c2 << c1) if (N0.getOpcode() == ISD::SHL) { SDValue N01 = N0.getOperand(1); if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01})) return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3); } // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one // use. { SDValue Sh, Y; // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). if (N0.getOpcode() == ISD::SHL && isConstantOrConstantVector(N0.getOperand(1)) && N0->hasOneUse()) { Sh = N0; Y = N1; } else if (N1.getOpcode() == ISD::SHL && isConstantOrConstantVector(N1.getOperand(1)) && N1->hasOneUse()) { Sh = N1; Y = N0; } if (Sh.getNode()) { SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y); return DAG.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1)); } } // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) if (DAG.isConstantIntBuildVectorOrConstantInt(N1) && N0.getOpcode() == ISD::ADD && DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) && isMulAddWithConstProfitable(N, N0, N1)) return DAG.getNode( ISD::ADD, DL, VT, DAG.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1), DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1)); // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)). if (N0.getOpcode() == ISD::VSCALE) if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) { const APInt &C0 = N0.getConstantOperandAPInt(0); const APInt &C1 = NC1->getAPIntValue(); return DAG.getVScale(DL, VT, C0 * C1); } // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)). APInt MulVal; if (N0.getOpcode() == ISD::STEP_VECTOR) if (ISD::isConstantSplatVector(N1.getNode(), MulVal)) { const APInt &C0 = N0.getConstantOperandAPInt(0); APInt NewStep = C0 * MulVal; return DAG.getStepVector(DL, VT, NewStep); } // Fold ((mul x, 0/undef) -> 0, // (mul x, 1) -> x) -> x) // -> and(x, mask) // We can replace vectors with '0' and '1' factors with a clearing mask. if (VT.isFixedLengthVector()) { unsigned NumElts = VT.getVectorNumElements(); SmallBitVector ClearMask; ClearMask.reserve(NumElts); auto IsClearMask = [&ClearMask](ConstantSDNode *V) { if (!V || V->isZero()) { ClearMask.push_back(true); return true; } ClearMask.push_back(false); return V->isOne(); }; if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) && ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) { assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector"); EVT LegalSVT = N1.getOperand(0).getValueType(); SDValue Zero = DAG.getConstant(0, DL, LegalSVT); SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT); SmallVector Mask(NumElts, AllOnes); for (unsigned I = 0; I != NumElts; ++I) if (ClearMask[I]) Mask[I] = Zero; return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask)); } } // reassociate mul if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags())) return RMUL; // Simplify the operands using demanded-bits information. if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); return SDValue(); } /// Return true if divmod libcall is available. static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI) { RTLIB::Libcall LC; EVT NodeType = Node->getValueType(0); if (!NodeType.isSimple()) return false; switch (NodeType.getSimpleVT().SimpleTy) { default: return false; // No libcall for vector types. case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; } return TLI.getLibcallName(LC) != nullptr; } /// Issue divrem if both quotient and remainder are needed. SDValue DAGCombiner::useDivRem(SDNode *Node) { if (Node->use_empty()) return SDValue(); // This is a dead node, leave it alone. unsigned Opcode = Node->getOpcode(); bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM); unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; // DivMod lib calls can still work on non-legal types if using lib-calls. EVT VT = Node->getValueType(0); if (VT.isVector() || !VT.isInteger()) return SDValue(); if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT)) return SDValue(); // If DIVREM is going to get expanded into a libcall, // but there is no libcall available, then don't combine. if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) && !isDivRemLibcallAvailable(Node, isSigned, TLI)) return SDValue(); // If div is legal, it's better to do the normal expansion unsigned OtherOpcode = 0; if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) { OtherOpcode = isSigned ? ISD::SREM : ISD::UREM; if (TLI.isOperationLegalOrCustom(Opcode, VT)) return SDValue(); } else { OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV; if (TLI.isOperationLegalOrCustom(OtherOpcode, VT)) return SDValue(); } SDValue Op0 = Node->getOperand(0); SDValue Op1 = Node->getOperand(1); SDValue combined; for (SDNode *User : Op0->uses()) { if (User == Node || User->getOpcode() == ISD::DELETED_NODE || User->use_empty()) continue; // Convert the other matching node(s), too; // otherwise, the DIVREM may get target-legalized into something // target-specific that we won't be able to recognize. unsigned UserOpc = User->getOpcode(); if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) && User->getOperand(0) == Op0 && User->getOperand(1) == Op1) { if (!combined) { if (UserOpc == OtherOpcode) { SDVTList VTs = DAG.getVTList(VT, VT); combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1); } else if (UserOpc == DivRemOpc) { combined = SDValue(User, 0); } else { assert(UserOpc == Opcode); continue; } } if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV) CombineTo(User, combined); else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM) CombineTo(User, combined.getValue(1)); } } return combined; } static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); SDLoc DL(N); unsigned Opc = N->getOpcode(); bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc); ConstantSDNode *N1C = isConstOrConstSplat(N1); // X / undef -> undef // X % undef -> undef // X / 0 -> undef // X % 0 -> undef // NOTE: This includes vectors where any divisor element is zero/undef. if (DAG.isUndef(Opc, {N0, N1})) return DAG.getUNDEF(VT); // undef / X -> 0 // undef % X -> 0 if (N0.isUndef()) return DAG.getConstant(0, DL, VT); // 0 / X -> 0 // 0 % X -> 0 ConstantSDNode *N0C = isConstOrConstSplat(N0); if (N0C && N0C->isZero()) return N0; // X / X -> 1 // X % X -> 0 if (N0 == N1) return DAG.getConstant(IsDiv ? 1 : 0, DL, VT); // X / 1 -> X // X % 1 -> 0 // If this is a boolean op (single-bit element type), we can't have // division-by-zero or remainder-by-zero, so assume the divisor is 1. // TODO: Similarly, if we're zero-extending a boolean divisor, then assume // it's a 1. if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1)) return IsDiv ? N0 : DAG.getConstant(0, DL, VT); return SDValue(); } SDValue DAGCombiner::visitSDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); EVT CCVT = getSetCCResultType(VT); SDLoc DL(N); // fold (sdiv c1, c2) -> c1/c2 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1})) return C; // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; // fold (sdiv X, -1) -> 0-X ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N1C && N1C->isAllOnes()) return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0) if (N1C && N1C->getAPIntValue().isMinSignedValue()) return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT)); if (SDValue V = simplifyDivRem(N, DAG)) return V; if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; // If we know the sign bits of both operands are zero, strength reduce to a // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1); if (SDValue V = visitSDIVLike(N0, N1, N)) { // If the corresponding remainder node exists, update its users with // (Dividend - (Quotient * Divisor). if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(), { N0, N1 })) { SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1); SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul); AddToWorklist(Mul.getNode()); AddToWorklist(Sub.getNode()); CombineTo(RemNode, Sub); } return V; } // sdiv, srem -> sdivrem // If the divisor is constant, then return DIVREM only if isIntDivCheap() is // true. Otherwise, we break the simplification logic in visitREM(). AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) if (SDValue DivRem = useDivRem(N)) return DivRem; return SDValue(); } static bool isDivisorPowerOfTwo(SDValue Divisor) { // Helper for determining whether a value is a power-2 constant scalar or a // vector of such elements. auto IsPowerOfTwo = [](ConstantSDNode *C) { if (C->isZero() || C->isOpaque()) return false; if (C->getAPIntValue().isPowerOf2()) return true; if (C->getAPIntValue().isNegatedPowerOf2()) return true; return false; }; return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo); } SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) { SDLoc DL(N); EVT VT = N->getValueType(0); EVT CCVT = getSetCCResultType(VT); unsigned BitWidth = VT.getScalarSizeInBits(); // fold (sdiv X, pow2) -> simple ops after legalize // FIXME: We check for the exact bit here because the generic lowering gives // better results in that case. The target-specific lowering should learn how // to handle exact sdivs efficiently. if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) { // Target-specific implementation of sdiv x, pow2. if (SDValue Res = BuildSDIVPow2(N)) return Res; // Create constants that are functions of the shift amount value. EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType()); SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy); SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1); C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy); SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1); if (!isConstantOrConstantVector(Inexact)) return SDValue(); // Splat the sign bit into the register SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0, DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy)); AddToWorklist(Sign.getNode()); // Add (N0 < 0) ? abs2 - 1 : 0; SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact); AddToWorklist(Srl.getNode()); SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl); AddToWorklist(Add.getNode()); SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1); AddToWorklist(Sra.getNode()); // Special case: (sdiv X, 1) -> X // Special Case: (sdiv X, -1) -> 0-X SDValue One = DAG.getConstant(1, DL, VT); SDValue AllOnes = DAG.getAllOnesConstant(DL, VT); SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ); SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ); SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes); Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra); // If dividing by a positive value, we're done. Otherwise, the result must // be negated. SDValue Zero = DAG.getConstant(0, DL, VT); SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra); // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding. SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT); SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra); return Res; } // If integer divide is expensive and we satisfy the requirements, emit an // alternate sequence. Targets may check function attributes for size/speed // trade-offs. AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); if (isConstantOrConstantVector(N1) && !TLI.isIntDivCheap(N->getValueType(0), Attr)) if (SDValue Op = BuildSDIV(N)) return Op; return SDValue(); } SDValue DAGCombiner::visitUDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); EVT CCVT = getSetCCResultType(VT); SDLoc DL(N); // fold (udiv c1, c2) -> c1/c2 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1})) return C; // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; // fold (udiv X, -1) -> select(X == -1, 1, 0) ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N1C && N1C->isAllOnes()) return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT)); if (SDValue V = simplifyDivRem(N, DAG)) return V; if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; if (SDValue V = visitUDIVLike(N0, N1, N)) { // If the corresponding remainder node exists, update its users with // (Dividend - (Quotient * Divisor). if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(), { N0, N1 })) { SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1); SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul); AddToWorklist(Mul.getNode()); AddToWorklist(Sub.getNode()); CombineTo(RemNode, Sub); } return V; } // sdiv, srem -> sdivrem // If the divisor is constant, then return DIVREM only if isIntDivCheap() is // true. Otherwise, we break the simplification logic in visitREM(). AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) if (SDValue DivRem = useDivRem(N)) return DivRem; return SDValue(); } SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) { SDLoc DL(N); EVT VT = N->getValueType(0); // fold (udiv x, (1 << c)) -> x >>u c if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && DAG.isKnownToBeAPowerOfTwo(N1)) { SDValue LogBase2 = BuildLogBase2(N1, DL); AddToWorklist(LogBase2.getNode()); EVT ShiftVT = getShiftAmountTy(N0.getValueType()); SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT); AddToWorklist(Trunc.getNode()); return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc); } // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 if (N1.getOpcode() == ISD::SHL) { SDValue N10 = N1.getOperand(0); if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) && DAG.isKnownToBeAPowerOfTwo(N10)) { SDValue LogBase2 = BuildLogBase2(N10, DL); AddToWorklist(LogBase2.getNode()); EVT ADDVT = N1.getOperand(1).getValueType(); SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT); AddToWorklist(Trunc.getNode()); SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc); AddToWorklist(Add.getNode()); return DAG.getNode(ISD::SRL, DL, VT, N0, Add); } } // fold (udiv x, c) -> alternate AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); if (isConstantOrConstantVector(N1) && !TLI.isIntDivCheap(N->getValueType(0), Attr)) if (SDValue Op = BuildUDIV(N)) return Op; return SDValue(); } SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) { if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) && !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) { // Target-specific implementation of srem x, pow2. if (SDValue Res = BuildSREMPow2(N)) return Res; } return SDValue(); } // handles ISD::SREM and ISD::UREM SDValue DAGCombiner::visitREM(SDNode *N) { unsigned Opcode = N->getOpcode(); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); EVT CCVT = getSetCCResultType(VT); bool isSigned = (Opcode == ISD::SREM); SDLoc DL(N); // fold (rem c1, c2) -> c1%c2 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1})) return C; // fold (urem X, -1) -> select(FX == -1, 0, FX) // Freeze the numerator to avoid a miscompile with an undefined value. if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false)) { SDValue F0 = DAG.getFreeze(N0); SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ); return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0); } if (SDValue V = simplifyDivRem(N, DAG)) return V; if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; if (isSigned) { // If we know the sign bits of both operands are zero, strength reduce to a // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::UREM, DL, VT, N0, N1); } else { if (DAG.isKnownToBeAPowerOfTwo(N1)) { // fold (urem x, pow2) -> (and x, pow2-1) SDValue NegOne = DAG.getAllOnesConstant(DL, VT); SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne); AddToWorklist(Add.getNode()); return DAG.getNode(ISD::AND, DL, VT, N0, Add); } // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) // fold (urem x, (lshr pow2, y)) -> (and x, (add (lshr pow2, y), -1)) // TODO: We should sink the following into isKnownToBePowerOfTwo // using a OrZero parameter analogous to our handling in ValueTracking. if ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) && DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) { SDValue NegOne = DAG.getAllOnesConstant(DL, VT); SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne); AddToWorklist(Add.getNode()); return DAG.getNode(ISD::AND, DL, VT, N0, Add); } } AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); // If X/C can be simplified by the division-by-constant logic, lower // X%C to the equivalent of X-X/C*C. // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the // speculative DIV must not cause a DIVREM conversion. We guard against this // by skipping the simplification if isIntDivCheap(). When div is not cheap, // combine will not return a DIVREM. Regardless, checking cheapness here // makes sense since the simplification results in fatter code. if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) { if (isSigned) { // check if we can build faster implementation for srem if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N)) return OptimizedRem; } SDValue OptimizedDiv = isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N); if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) { // If the equivalent Div node also exists, update its users. unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV; if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(), { N0, N1 })) CombineTo(DivNode, OptimizedDiv); SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1); SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul); AddToWorklist(OptimizedDiv.getNode()); AddToWorklist(Mul.getNode()); return Sub; } } // sdiv, srem -> sdivrem if (SDValue DivRem = useDivRem(N)) return DivRem.getValue(1); return SDValue(); } SDValue DAGCombiner::visitMULHS(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); SDLoc DL(N); // fold (mulhs c1, c2) if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1})) return C; // canonicalize constant to RHS. if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0); if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; // fold (mulhs x, 0) -> 0 // do not return N1, because undef node may exist. if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) return DAG.getConstant(0, DL, VT); } // fold (mulhs x, 0) -> 0 if (isNullConstant(N1)) return N1; // fold (mulhs x, 1) -> (sra x, size(x)-1) if (isOneConstant(N1)) return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0, DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL, getShiftAmountTy(N0.getValueType()))); // fold (mulhs x, undef) -> 0 if (N0.isUndef() || N1.isUndef()) return DAG.getConstant(0, DL, VT); // If the type twice as wide is legal, transform the mulhs to a wider multiply // plus a shift. if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() && !VT.isVector()) { MVT Simple = VT.getSimpleVT(); unsigned SimpleSize = Simple.getSizeInBits(); EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); if (TLI.isOperationLegal(ISD::MUL, NewVT)) { N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0); N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, DAG.getConstant(SimpleSize, DL, getShiftAmountTy(N1.getValueType()))); return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); } } return SDValue(); } SDValue DAGCombiner::visitMULHU(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); SDLoc DL(N); // fold (mulhu c1, c2) if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1})) return C; // canonicalize constant to RHS. if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0); if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; // fold (mulhu x, 0) -> 0 // do not return N1, because undef node may exist. if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) return DAG.getConstant(0, DL, VT); } // fold (mulhu x, 0) -> 0 if (isNullConstant(N1)) return N1; // fold (mulhu x, 1) -> 0 if (isOneConstant(N1)) return DAG.getConstant(0, DL, N0.getValueType()); // fold (mulhu x, undef) -> 0 if (N0.isUndef() || N1.isUndef()) return DAG.getConstant(0, DL, VT); // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c) if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) { unsigned NumEltBits = VT.getScalarSizeInBits(); SDValue LogBase2 = BuildLogBase2(N1, DL); SDValue SRLAmt = DAG.getNode( ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2); EVT ShiftVT = getShiftAmountTy(N0.getValueType()); SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT); return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc); } // If the type twice as wide is legal, transform the mulhu to a wider multiply // plus a shift. if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() && !VT.isVector()) { MVT Simple = VT.getSimpleVT(); unsigned SimpleSize = Simple.getSizeInBits(); EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); if (TLI.isOperationLegal(ISD::MUL, NewVT)) { N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0); N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, DAG.getConstant(SimpleSize, DL, getShiftAmountTy(N1.getValueType()))); return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); } } // Simplify the operands using demanded-bits information. // We don't have demanded bits support for MULHU so this just enables constant // folding based on known bits. if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); return SDValue(); } SDValue DAGCombiner::visitAVG(SDNode *N) { unsigned Opcode = N->getOpcode(); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); SDLoc DL(N); // fold (avg c1, c2) if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1})) return C; // canonicalize constant to RHS. if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0); if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; // fold (avgfloor x, 0) -> x >> 1 if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) { if (Opcode == ISD::AVGFLOORS) return DAG.getNode(ISD::SRA, DL, VT, N0, DAG.getConstant(1, DL, VT)); if (Opcode == ISD::AVGFLOORU) return DAG.getNode(ISD::SRL, DL, VT, N0, DAG.getConstant(1, DL, VT)); } } // fold (avg x, undef) -> x if (N0.isUndef()) return N1; if (N1.isUndef()) return N0; // TODO If we use avg for scalars anywhere, we can add (avgfl x, 0) -> x >> 1 return SDValue(); } /// Perform optimizations common to nodes that compute two values. LoOp and HiOp /// give the opcodes for the two computations that are being performed. Return /// true if a simplification was made. SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp) { // If the high half is not needed, just compute the low half. bool HiExists = N->hasAnyUseOfValue(1); if (!HiExists && (!LegalOperations || TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) { SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); return CombineTo(N, Res, Res); } // If the low half is not needed, just compute the high half. bool LoExists = N->hasAnyUseOfValue(0); if (!LoExists && (!LegalOperations || TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) { SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); return CombineTo(N, Res, Res); } // If both halves are used, return as it is. if (LoExists && HiExists) return SDValue(); // If the two computed results can be simplified separately, separate them. if (LoExists) { SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); AddToWorklist(Lo.getNode()); SDValue LoOpt = combine(Lo.getNode()); if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() && (!LegalOperations || TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType()))) return CombineTo(N, LoOpt, LoOpt); } if (HiExists) { SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); AddToWorklist(Hi.getNode()); SDValue HiOpt = combine(Hi.getNode()); if (HiOpt.getNode() && HiOpt != Hi && (!LegalOperations || TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType()))) return CombineTo(N, HiOpt, HiOpt); } return SDValue(); } SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS)) return Res; SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); SDLoc DL(N); // canonicalize constant to RHS (vector doesn't have to splat) if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N1, N0); // If the type is twice as wide is legal, transform the mulhu to a wider // multiply plus a shift. if (VT.isSimple() && !VT.isVector()) { MVT Simple = VT.getSimpleVT(); unsigned SimpleSize = Simple.getSizeInBits(); EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); if (TLI.isOperationLegal(ISD::MUL, NewVT)) { SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0); SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); // Compute the high part as N1. Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, DAG.getConstant(SimpleSize, DL, getShiftAmountTy(Lo.getValueType()))); Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); // Compute the low part as N0. Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); return CombineTo(N, Lo, Hi); } } return SDValue(); } SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU)) return Res; SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); SDLoc DL(N); // canonicalize constant to RHS (vector doesn't have to splat) if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N1, N0); // (umul_lohi N0, 0) -> (0, 0) if (isNullConstant(N1)) { SDValue Zero = DAG.getConstant(0, DL, VT); return CombineTo(N, Zero, Zero); } // (umul_lohi N0, 1) -> (N0, 0) if (isOneConstant(N1)) { SDValue Zero = DAG.getConstant(0, DL, VT); return CombineTo(N, N0, Zero); } // If the type is twice as wide is legal, transform the mulhu to a wider // multiply plus a shift. if (VT.isSimple() && !VT.isVector()) { MVT Simple = VT.getSimpleVT(); unsigned SimpleSize = Simple.getSizeInBits(); EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); if (TLI.isOperationLegal(ISD::MUL, NewVT)) { SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0); SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); // Compute the high part as N1. Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, DAG.getConstant(SimpleSize, DL, getShiftAmountTy(Lo.getValueType()))); Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); // Compute the low part as N0. Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); return CombineTo(N, Lo, Hi); } } return SDValue(); } SDValue DAGCombiner::visitMULO(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); bool IsSigned = (ISD::SMULO == N->getOpcode()); EVT CarryVT = N->getValueType(1); SDLoc DL(N); ConstantSDNode *N0C = isConstOrConstSplat(N0); ConstantSDNode *N1C = isConstOrConstSplat(N1); // fold operation with constant operands. // TODO: Move this to FoldConstantArithmetic when it supports nodes with // multiple results. if (N0C && N1C) { bool Overflow; APInt Result = IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow) : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow); return CombineTo(N, DAG.getConstant(Result, DL, VT), DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT)); } // canonicalize constant to RHS. if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0); // fold (mulo x, 0) -> 0 + no carry out if (isNullOrNullSplat(N1)) return CombineTo(N, DAG.getConstant(0, DL, VT), DAG.getConstant(0, DL, CarryVT)); // (mulo x, 2) -> (addo x, x) // FIXME: This needs a freeze. if (N1C && N1C->getAPIntValue() == 2 && (!IsSigned || VT.getScalarSizeInBits() > 2)) return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL, N->getVTList(), N0, N0); if (IsSigned) { // A 1 bit SMULO overflows if both inputs are 1. if (VT.getScalarSizeInBits() == 1) { SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1); return CombineTo(N, And, DAG.getSetCC(DL, CarryVT, And, DAG.getConstant(0, DL, VT), ISD::SETNE)); } // Multiplying n * m significant bits yields a result of n + m significant // bits. If the total number of significant bits does not exceed the // result bit width (minus 1), there is no overflow. unsigned SignBits = DAG.ComputeNumSignBits(N0); if (SignBits > 1) SignBits += DAG.ComputeNumSignBits(N1); if (SignBits > VT.getScalarSizeInBits() + 1) return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1), DAG.getConstant(0, DL, CarryVT)); } else { KnownBits N1Known = DAG.computeKnownBits(N1); KnownBits N0Known = DAG.computeKnownBits(N0); bool Overflow; (void)N0Known.getMaxValue().umul_ov(N1Known.getMaxValue(), Overflow); if (!Overflow) return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1), DAG.getConstant(0, DL, CarryVT)); } return SDValue(); } // Function to calculate whether the Min/Max pair of SDNodes (potentially // swapped around) make a signed saturate pattern, clamping to between a signed // saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW. // Returns the node being clamped and the bitwidth of the clamp in BW. Should // work with both SMIN/SMAX nodes and setcc/select combo. The operands are the // same as SimplifySelectCC. N0getAPIntValue(); const APInt &C2 = N3C->getAPIntValue(); if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth())) return 0; return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0); }; // Check the initial value is a SMIN/SMAX equivalent. unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC); if (!Opcode0) return SDValue(); SDValue N00, N01, N02, N03; ISD::CondCode N0CC; switch (N0.getOpcode()) { case ISD::SMIN: case ISD::SMAX: N00 = N02 = N0.getOperand(0); N01 = N03 = N0.getOperand(1); N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT; break; case ISD::SELECT_CC: N00 = N0.getOperand(0); N01 = N0.getOperand(1); N02 = N0.getOperand(2); N03 = N0.getOperand(3); N0CC = cast(N0.getOperand(4))->get(); break; case ISD::SELECT: case ISD::VSELECT: if (N0.getOperand(0).getOpcode() != ISD::SETCC) return SDValue(); N00 = N0.getOperand(0).getOperand(0); N01 = N0.getOperand(0).getOperand(1); N02 = N0.getOperand(1); N03 = N0.getOperand(2); N0CC = cast(N0.getOperand(0).getOperand(2))->get(); break; default: return SDValue(); } unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC); if (!Opcode1 || Opcode0 == Opcode1) return SDValue(); ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01); ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1); if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0)) return SDValue(); const APInt &MinC = MinCOp->getAPIntValue(); const APInt &MaxC = MaxCOp->getAPIntValue(); APInt MinCPlus1 = MinC + 1; if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) { BW = MinCPlus1.exactLogBase2() + 1; Unsigned = false; return N02; } if (MaxC == 0 && MinCPlus1.isPowerOf2()) { BW = MinCPlus1.exactLogBase2(); Unsigned = true; return N02; } return SDValue(); } static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG) { unsigned BW; bool Unsigned; SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned); if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT) return SDValue(); EVT FPVT = Fp.getOperand(0).getValueType(); EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW); if (FPVT.isVector()) NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT, FPVT.getVectorElementCount()); unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT; if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT)) return SDValue(); SDLoc DL(Fp); SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0), DAG.getValueType(NewVT.getScalarType())); return Unsigned ? DAG.getZExtOrTrunc(Sat, DL, N2->getValueType(0)) : DAG.getSExtOrTrunc(Sat, DL, N2->getValueType(0)); } static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG) { // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may // be truncated versions of the the setcc (N0/N1). if ((N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) || N0.getOpcode() != ISD::FP_TO_UINT || CC != ISD::SETULT) return SDValue(); ConstantSDNode *N1C = isConstOrConstSplat(N1); ConstantSDNode *N3C = isConstOrConstSplat(N3); if (!N1C || !N3C) return SDValue(); const APInt &C1 = N1C->getAPIntValue(); const APInt &C3 = N3C->getAPIntValue(); if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() || C1 != C3.zext(C1.getBitWidth())) return SDValue(); unsigned BW = (C1 + 1).exactLogBase2(); EVT FPVT = N0.getOperand(0).getValueType(); EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW); if (FPVT.isVector()) NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT, FPVT.getVectorElementCount()); if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(ISD::FP_TO_UINT_SAT, FPVT, NewVT)) return SDValue(); SDValue Sat = DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0), DAG.getValueType(NewVT.getScalarType())); return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType()); } SDValue DAGCombiner::visitIMINMAX(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); unsigned Opcode = N->getOpcode(); SDLoc DL(N); // fold operation with constant operands. if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1})) return C; // If the operands are the same, this is a no-op. if (N0 == N1) return N0; // canonicalize constant to RHS if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(Opcode, DL, VT, N1, N0); // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX. // Only do this if the current op isn't legal and the flipped is. if (!TLI.isOperationLegal(Opcode, VT) && (N0.isUndef() || DAG.SignBitIsZero(N0)) && (N1.isUndef() || DAG.SignBitIsZero(N1))) { unsigned AltOpcode; switch (Opcode) { case ISD::SMIN: AltOpcode = ISD::UMIN; break; case ISD::SMAX: AltOpcode = ISD::UMAX; break; case ISD::UMIN: AltOpcode = ISD::SMIN; break; case ISD::UMAX: AltOpcode = ISD::SMAX; break; default: llvm_unreachable("Unknown MINMAX opcode"); } if (TLI.isOperationLegal(AltOpcode, VT)) return DAG.getNode(AltOpcode, DL, VT, N0, N1); } if (Opcode == ISD::SMIN || Opcode == ISD::SMAX) if (SDValue S = PerformMinMaxFpToSatCombine( N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG)) return S; if (Opcode == ISD::UMIN) if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG)) return S; // Simplify the operands using demanded-bits information. if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); return SDValue(); } /// If this is a bitwise logic instruction and both operands have the same /// opcode, try to sink the other opcode after the logic instruction. SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) { SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); EVT VT = N0.getValueType(); unsigned LogicOpcode = N->getOpcode(); unsigned HandOpcode = N0.getOpcode(); assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || LogicOpcode == ISD::XOR) && "Expected logic opcode"); assert(HandOpcode == N1.getOpcode() && "Bad input!"); // Bail early if none of these transforms apply. if (N0.getNumOperands() == 0) return SDValue(); // FIXME: We should check number of uses of the operands to not increase // the instruction count for all transforms. // Handle size-changing casts. SDValue X = N0.getOperand(0); SDValue Y = N1.getOperand(0); EVT XVT = X.getValueType(); SDLoc DL(N); if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND || HandOpcode == ISD::SIGN_EXTEND) { // If both operands have other uses, this transform would create extra // instructions without eliminating anything. if (!N0.hasOneUse() && !N1.hasOneUse()) return SDValue(); // We need matching integer source types. if (XVT != Y.getValueType()) return SDValue(); // Don't create an illegal op during or after legalization. Don't ever // create an unsupported vector op. if ((VT.isVector() || LegalOperations) && !TLI.isOperationLegalOrCustom(LogicOpcode, XVT)) return SDValue(); // Avoid infinite looping with PromoteIntBinOp. // TODO: Should we apply desirable/legal constraints to all opcodes? if (HandOpcode == ISD::ANY_EXTEND && LegalTypes && !TLI.isTypeDesirableForOp(LogicOpcode, XVT)) return SDValue(); // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y) SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y); return DAG.getNode(HandOpcode, DL, VT, Logic); } // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y) if (HandOpcode == ISD::TRUNCATE) { // If both operands have other uses, this transform would create extra // instructions without eliminating anything. if (!N0.hasOneUse() && !N1.hasOneUse()) return SDValue(); // We need matching source types. if (XVT != Y.getValueType()) return SDValue(); // Don't create an illegal op during or after legalization. if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT)) return SDValue(); // Be extra careful sinking truncate. If it's free, there's no benefit in // widening a binop. Also, don't create a logic op on an illegal type. if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT)) return SDValue(); if (!TLI.isTypeLegal(XVT)) return SDValue(); SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y); return DAG.getNode(HandOpcode, DL, VT, Logic); } // For binops SHL/SRL/SRA/AND: // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL || HandOpcode == ISD::SRA || HandOpcode == ISD::AND) && N0.getOperand(1) == N1.getOperand(1)) { // If either operand has other uses, this transform is not an improvement. if (!N0.hasOneUse() || !N1.hasOneUse()) return SDValue(); SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y); return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1)); } // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y) if (HandOpcode == ISD::BSWAP) { // If either operand has other uses, this transform is not an improvement. if (!N0.hasOneUse() || !N1.hasOneUse()) return SDValue(); SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y); return DAG.getNode(HandOpcode, DL, VT, Logic); } // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) // Only perform this optimization up until type legalization, before // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and // we don't want to undo this promotion. // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper // on scalars. if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) && Level <= AfterLegalizeTypes) { // Input types must be integer and the same. if (XVT.isInteger() && XVT == Y.getValueType() && !(VT.isVector() && TLI.isTypeLegal(VT) && !XVT.isVector() && !TLI.isTypeLegal(XVT))) { SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y); return DAG.getNode(HandOpcode, DL, VT, Logic); } } // Xor/and/or are indifferent to the swizzle operation (shuffle of one value). // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B)) // If both shuffles use the same mask, and both shuffle within a single // vector, then it is worthwhile to move the swizzle after the operation. // The type-legalizer generates this pattern when loading illegal // vector types from memory. In many cases this allows additional shuffle // optimizations. // There are other cases where moving the shuffle after the xor/and/or // is profitable even if shuffles don't perform a swizzle. // If both shuffles use the same mask, and both shuffles have the same first // or second operand, then it might still be profitable to move the shuffle // after the xor/and/or operation. if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) { auto *SVN0 = cast(N0); auto *SVN1 = cast(N1); assert(X.getValueType() == Y.getValueType() && "Inputs to shuffles are not the same type"); // Check that both shuffles use the same mask. The masks are known to be of // the same length because the result vector type is the same. // Check also that shuffles have only one use to avoid introducing extra // instructions. if (!SVN0->hasOneUse() || !SVN1->hasOneUse() || !SVN0->getMask().equals(SVN1->getMask())) return SDValue(); // Don't try to fold this node if it requires introducing a // build vector of all zeros that might be illegal at this stage. SDValue ShOp = N0.getOperand(1); if (LogicOpcode == ISD::XOR && !ShOp.isUndef()) ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations); // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C) if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) { SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(0), N1.getOperand(0)); return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask()); } // Don't try to fold this node if it requires introducing a // build vector of all zeros that might be illegal at this stage. ShOp = N0.getOperand(0); if (LogicOpcode == ISD::XOR && !ShOp.isUndef()) ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations); // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B)) if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) { SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1), N1.getOperand(1)); return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask()); } } return SDValue(); } /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient. SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, const SDLoc &DL) { SDValue LL, LR, RL, RR, N0CC, N1CC; if (!isSetCCEquivalent(N0, LL, LR, N0CC) || !isSetCCEquivalent(N1, RL, RR, N1CC)) return SDValue(); assert(N0.getValueType() == N1.getValueType() && "Unexpected operand types for bitwise logic op"); assert(LL.getValueType() == LR.getValueType() && RL.getValueType() == RR.getValueType() && "Unexpected operand types for setcc"); // If we're here post-legalization or the logic op type is not i1, the logic // op type must match a setcc result type. Also, all folds require new // operations on the left and right operands, so those types must match. EVT VT = N0.getValueType(); EVT OpVT = LL.getValueType(); if (LegalOperations || VT.getScalarType() != MVT::i1) if (VT != getSetCCResultType(OpVT)) return SDValue(); if (OpVT != RL.getValueType()) return SDValue(); ISD::CondCode CC0 = cast(N0CC)->get(); ISD::CondCode CC1 = cast(N1CC)->get(); bool IsInteger = OpVT.isInteger(); if (LR == RR && CC0 == CC1 && IsInteger) { bool IsZero = isNullOrNullSplat(LR); bool IsNeg1 = isAllOnesOrAllOnesSplat(LR); // All bits clear? bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero; // All sign bits clear? bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1; // Any bits set? bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero; // Any sign bits set? bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero; // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0) // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1) // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0) // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0) if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) { SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL); AddToWorklist(Or.getNode()); return DAG.getSetCC(DL, VT, Or, LR, CC1); } // All bits set? bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1; // All sign bits set? bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero; // Any bits clear? bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1; // Any sign bits clear? bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1; // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1) // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0) // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1) // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1) if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) { SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL); AddToWorklist(And.getNode()); return DAG.getSetCC(DL, VT, And, LR, CC1); } } // TODO: What is the 'or' equivalent of this fold? // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2) if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 && IsInteger && CC0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) || (isAllOnesConstant(LR) && isNullConstant(RR)))) { SDValue One = DAG.getConstant(1, DL, OpVT); SDValue Two = DAG.getConstant(2, DL, OpVT); SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One); AddToWorklist(Add.getNode()); return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE); } // Try more general transforms if the predicates match and the only user of // the compares is the 'and' or 'or'. if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 && N0.hasOneUse() && N1.hasOneUse()) { // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) { SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR); SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR); SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR); SDValue Zero = DAG.getConstant(0, DL, OpVT); return DAG.getSetCC(DL, VT, Or, Zero, CC1); } // Turn compare of constants whose difference is 1 bit into add+and+setcc. if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) { // Match a shared variable operand and 2 non-opaque constant operands. auto MatchDiffPow2 = [&](ConstantSDNode *C0, ConstantSDNode *C1) { // The difference of the constants must be a single bit. const APInt &CMax = APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue()); const APInt &CMin = APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue()); return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2(); }; if (LL == RL && ISD::matchBinaryPredicate(LR, RR, MatchDiffPow2)) { // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) --> // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR); SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR); SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min); SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min); SDValue Mask = DAG.getNOT(DL, Diff, OpVT); SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask); SDValue Zero = DAG.getConstant(0, DL, OpVT); return DAG.getSetCC(DL, VT, And, Zero, CC0); } } } // Canonicalize equivalent operands to LL == RL. if (LL == RR && LR == RL) { CC1 = ISD::getSetCCSwappedOperands(CC1); std::swap(RL, RR); } // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC) // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC) if (LL == RL && LR == RR) { ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT) : ISD::getSetCCOrOperation(CC0, CC1, OpVT); if (NewCC != ISD::SETCC_INVALID && (!LegalOperations || (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) && TLI.isOperationLegal(ISD::SETCC, OpVT)))) return DAG.getSetCC(DL, VT, LL, LR, NewCC); } return SDValue(); } /// This contains all DAGCombine rules which reduce two values combined by /// an And operation to a single value. This makes them reusable in the context /// of visitSELECT(). Rules involving constants are not included as /// visitSELECT() already handles those cases. SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) { EVT VT = N1.getValueType(); SDLoc DL(N); // fold (and x, undef) -> 0 if (N0.isUndef() || N1.isUndef()) return DAG.getConstant(0, DL, VT); if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL)) return V; // TODO: Rewrite this to return a new 'AND' instead of using CombineTo. if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && VT.getSizeInBits() <= 64 && N0->hasOneUse()) { if (ConstantSDNode *ADDI = dyn_cast(N0.getOperand(1))) { if (ConstantSDNode *SRLI = dyn_cast(N1.getOperand(1))) { // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal // immediate for an add, but it is legal if its top c2 bits are set, // transform the ADD so the immediate doesn't need to be materialized // in a register. APInt ADDC = ADDI->getAPIntValue(); APInt SRLC = SRLI->getAPIntValue(); if (ADDC.getMinSignedBits() <= 64 && SRLC.ult(VT.getSizeInBits()) && !TLI.isLegalAddImmediate(ADDC.getSExtValue())) { APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), SRLC.getZExtValue()); if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { ADDC |= Mask; if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { SDLoc DL0(N0); SDValue NewAdd = DAG.getNode(ISD::ADD, DL0, VT, N0.getOperand(0), DAG.getConstant(ADDC, DL, VT)); CombineTo(N0.getNode(), NewAdd); // Return N so it doesn't get rechecked! return SDValue(N, 0); } } } } } } // Reduce bit extract of low half of an integer to the narrower type. // (and (srl i64:x, K), KMask) -> // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask) if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { if (ConstantSDNode *CAnd = dyn_cast(N1)) { if (ConstantSDNode *CShift = dyn_cast(N0.getOperand(1))) { unsigned Size = VT.getSizeInBits(); const APInt &AndMask = CAnd->getAPIntValue(); unsigned ShiftBits = CShift->getZExtValue(); // Bail out, this node will probably disappear anyway. if (ShiftBits == 0) return SDValue(); unsigned MaskBits = AndMask.countTrailingOnes(); EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2); if (AndMask.isMask() && // Required bits must not span the two halves of the integer and // must fit in the half size type. (ShiftBits + MaskBits <= Size / 2) && TLI.isNarrowingProfitable(VT, HalfVT) && TLI.isTypeDesirableForOp(ISD::AND, HalfVT) && TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) && TLI.isTruncateFree(VT, HalfVT) && TLI.isZExtFree(HalfVT, VT)) { // The isNarrowingProfitable is to avoid regressions on PPC and // AArch64 which match a few 64-bit bit insert / bit extract patterns // on downstream users of this. Those patterns could probably be // extended to handle extensions mixed in. SDValue SL(N0); assert(MaskBits <= Size); // Extracting the highest bit of the low half. EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout()); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT, N0.getOperand(0)); SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT); SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT); SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK); SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask); return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And); } } } } return SDValue(); } bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, EVT LoadResultTy, EVT &ExtVT) { if (!AndC->getAPIntValue().isMask()) return false; unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes(); ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); EVT LoadedVT = LoadN->getMemoryVT(); if (ExtVT == LoadedVT && (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) { // ZEXTLOAD will match without needing to change the size of the value being // loaded. return true; } // Do not change the width of a volatile or atomic loads. if (!LoadN->isSimple()) return false; // Do not generate loads of non-round integer types since these can // be expensive (and would be wrong if the type is not byte sized). if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound()) return false; if (LegalOperations && !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT)) return false; if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT)) return false; return true; } bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST, ISD::LoadExtType ExtType, EVT &MemVT, unsigned ShAmt) { if (!LDST) return false; // Only allow byte offsets. if (ShAmt % 8) return false; // Do not generate loads of non-round integer types since these can // be expensive (and would be wrong if the type is not byte sized). if (!MemVT.isRound()) return false; // Don't change the width of a volatile or atomic loads. if (!LDST->isSimple()) return false; EVT LdStMemVT = LDST->getMemoryVT(); // Bail out when changing the scalable property, since we can't be sure that // we're actually narrowing here. if (LdStMemVT.isScalableVector() != MemVT.isScalableVector()) return false; // Verify that we are actually reducing a load width here. if (LdStMemVT.bitsLT(MemVT)) return false; // Ensure that this isn't going to produce an unsupported memory access. if (ShAmt) { assert(ShAmt % 8 == 0 && "ShAmt is byte offset"); const unsigned ByteShAmt = ShAmt / 8; const Align LDSTAlign = LDST->getAlign(); const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt); if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, LDST->getAddressSpace(), NarrowAlign, LDST->getMemOperand()->getFlags())) return false; } // It's not possible to generate a constant of extended or untyped type. EVT PtrType = LDST->getBasePtr().getValueType(); if (PtrType == MVT::Untyped || PtrType.isExtended()) return false; if (isa(LDST)) { LoadSDNode *Load = cast(LDST); // Don't transform one with multiple uses, this would require adding a new // load. if (!SDValue(Load, 0).hasOneUse()) return false; if (LegalOperations && !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT)) return false; // For the transform to be legal, the load must produce only two values // (the value loaded and the chain). Don't transform a pre-increment // load, for example, which produces an extra value. Otherwise the // transformation is not equivalent, and the downstream logic to replace // uses gets things wrong. if (Load->getNumValues() > 2) return false; // If the load that we're shrinking is an extload and we're not just // discarding the extension we can't simply shrink the load. Bail. // TODO: It would be possible to merge the extensions in some cases. if (Load->getExtensionType() != ISD::NON_EXTLOAD && Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt) return false; if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT)) return false; } else { assert(isa(LDST) && "It is not a Load nor a Store SDNode"); StoreSDNode *Store = cast(LDST); // Can't write outside the original store if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt) return false; if (LegalOperations && !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT)) return false; } return true; } bool DAGCombiner::SearchForAndLoads(SDNode *N, SmallVectorImpl &Loads, SmallPtrSetImpl &NodesWithConsts, ConstantSDNode *Mask, SDNode *&NodeToMask) { // Recursively search for the operands, looking for loads which can be // narrowed. for (SDValue Op : N->op_values()) { if (Op.getValueType().isVector()) return false; // Some constants may need fixing up later if they are too large. if (auto *C = dyn_cast(Op)) { if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) && (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue()) NodesWithConsts.insert(N); continue; } if (!Op.hasOneUse()) return false; switch(Op.getOpcode()) { case ISD::LOAD: { auto *Load = cast(Op); EVT ExtVT; if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) && isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) { // ZEXTLOAD is already small enough. if (Load->getExtensionType() == ISD::ZEXTLOAD && ExtVT.bitsGE(Load->getMemoryVT())) continue; // Use LE to convert equal sized loads to zext. if (ExtVT.bitsLE(Load->getMemoryVT())) Loads.push_back(Load); continue; } return false; } case ISD::ZERO_EXTEND: case ISD::AssertZext: { unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes(); EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); EVT VT = Op.getOpcode() == ISD::AssertZext ? cast(Op.getOperand(1))->getVT() : Op.getOperand(0).getValueType(); // We can accept extending nodes if the mask is wider or an equal // width to the original type. if (ExtVT.bitsGE(VT)) continue; break; } case ISD::OR: case ISD::XOR: case ISD::AND: if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask, NodeToMask)) return false; continue; } // Allow one node which will masked along with any loads found. if (NodeToMask) return false; // Also ensure that the node to be masked only produces one data result. NodeToMask = Op.getNode(); if (NodeToMask->getNumValues() > 1) { bool HasValue = false; for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) { MVT VT = SDValue(NodeToMask, i).getSimpleValueType(); if (VT != MVT::Glue && VT != MVT::Other) { if (HasValue) { NodeToMask = nullptr; return false; } HasValue = true; } } assert(HasValue && "Node to be masked has no data result?"); } } return true; } bool DAGCombiner::BackwardsPropagateMask(SDNode *N) { auto *Mask = dyn_cast(N->getOperand(1)); if (!Mask) return false; if (!Mask->getAPIntValue().isMask()) return false; // No need to do anything if the and directly uses a load. if (isa(N->getOperand(0))) return false; SmallVector Loads; SmallPtrSet NodesWithConsts; SDNode *FixupNode = nullptr; if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) { if (Loads.size() == 0) return false; LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump()); SDValue MaskOp = N->getOperand(1); // If it exists, fixup the single node we allow in the tree that needs // masking. if (FixupNode) { LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump()); SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode), FixupNode->getValueType(0), SDValue(FixupNode, 0), MaskOp); DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And); if (And.getOpcode() == ISD ::AND) DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp); } // Narrow any constants that need it. for (auto *LogicN : NodesWithConsts) { SDValue Op0 = LogicN->getOperand(0); SDValue Op1 = LogicN->getOperand(1); if (isa(Op0)) std::swap(Op0, Op1); SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOp); DAG.UpdateNodeOperands(LogicN, Op0, And); } // Create narrow loads. for (auto *Load : Loads) { LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump()); SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0), SDValue(Load, 0), MaskOp); DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And); if (And.getOpcode() == ISD ::AND) And = SDValue( DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0); SDValue NewLoad = reduceLoadWidth(And.getNode()); assert(NewLoad && "Shouldn't be masking the load if it can't be narrowed"); CombineTo(Load, NewLoad, NewLoad.getValue(1)); } DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode()); return true; } return false; } // Unfold // x & (-1 'logical shift' y) // To // (x 'opposite logical shift' y) 'logical shift' y // if it is better for performance. SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) { assert(N->getOpcode() == ISD::AND); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); // Do we actually prefer shifts over mask? if (!TLI.shouldFoldMaskToVariableShiftPair(N0)) return SDValue(); // Try to match (-1 '[outer] logical shift' y) unsigned OuterShift; unsigned InnerShift; // The opposite direction to the OuterShift. SDValue Y; // Shift amount. auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool { if (!M.hasOneUse()) return false; OuterShift = M->getOpcode(); if (OuterShift == ISD::SHL) InnerShift = ISD::SRL; else if (OuterShift == ISD::SRL) InnerShift = ISD::SHL; else return false; if (!isAllOnesConstant(M->getOperand(0))) return false; Y = M->getOperand(1); return true; }; SDValue X; if (matchMask(N1)) X = N0; else if (matchMask(N0)) X = N1; else return SDValue(); SDLoc DL(N); EVT VT = N->getValueType(0); // tmp = x 'opposite logical shift' y SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y); // ret = tmp 'logical shift' y SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y); return T1; } /// Try to replace shift/logic that tests if a bit is clear with mask + setcc. /// For a target with a bit test, this is expected to become test + set and save /// at least 1 instruction. static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) { assert(And->getOpcode() == ISD::AND && "Expected an 'and' op"); // This is probably not worthwhile without a supported type. EVT VT = And->getValueType(0); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!TLI.isTypeLegal(VT)) return SDValue(); // Look through an optional extension. SDValue And0 = And->getOperand(0), And1 = And->getOperand(1); if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse()) And0 = And0.getOperand(0); if (!isOneConstant(And1) || !And0.hasOneUse()) return SDValue(); SDValue Src = And0; // Attempt to find a 'not' op. // TODO: Should we favor test+set even without the 'not' op? bool FoundNot = false; if (isBitwiseNot(Src)) { FoundNot = true; Src = Src.getOperand(0); // Look though an optional truncation. The source operand may not be the // same type as the original 'and', but that is ok because we are masking // off everything but the low bit. if (Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse()) Src = Src.getOperand(0); } // Match a shift-right by constant. if (Src.getOpcode() != ISD::SRL || !Src.hasOneUse()) return SDValue(); // We might have looked through casts that make this transform invalid. // TODO: If the source type is wider than the result type, do the mask and // compare in the source type. unsigned VTBitWidth = VT.getScalarSizeInBits(); SDValue ShiftAmt = Src.getOperand(1); auto *ShiftAmtC = dyn_cast(ShiftAmt); if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(VTBitWidth)) return SDValue(); // Set source to shift source. Src = Src.getOperand(0); // Try again to find a 'not' op. // TODO: Should we favor test+set even with two 'not' ops? if (!FoundNot) { if (!isBitwiseNot(Src)) return SDValue(); Src = Src.getOperand(0); } if (!TLI.hasBitTest(Src, ShiftAmt)) return SDValue(); // Turn this into a bit-test pattern using mask op + setcc: // and (not (srl X, C)), 1 --> (and X, 1< (and X, 1<getZExtValue()), DL, VT); SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask); SDValue Zero = DAG.getConstant(0, DL, VT); SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ); return DAG.getZExtOrTrunc(Setcc, DL, VT); } /// For targets that support usubsat, match a bit-hack form of that operation /// that ends in 'and' and convert it. static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N1.getValueType(); // Canonicalize SRA as operand 1. if (N0.getOpcode() == ISD::SRA) std::swap(N0, N1); // xor/add with SMIN (signmask) are logically equivalent. if (N0.getOpcode() != ISD::XOR && N0.getOpcode() != ISD::ADD) return SDValue(); if (N1.getOpcode() != ISD::SRA || !N0.hasOneUse() || !N1.hasOneUse() || N0.getOperand(0) != N1.getOperand(0)) return SDValue(); unsigned BitWidth = VT.getScalarSizeInBits(); ConstantSDNode *XorC = isConstOrConstSplat(N0.getOperand(1), true); ConstantSDNode *SraC = isConstOrConstSplat(N1.getOperand(1), true); if (!XorC || !XorC->getAPIntValue().isSignMask() || !SraC || SraC->getAPIntValue() != BitWidth - 1) return SDValue(); // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128 // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128 SDLoc DL(N); SDValue SignMask = DAG.getConstant(XorC->getAPIntValue(), DL, VT); return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), SignMask); } /// Given a bitwise logic operation N with a matching bitwise logic operand, /// fold a pattern where 2 of the source operands are identically shifted /// values. For example: /// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp, SelectionDAG &DAG) { unsigned LogicOpcode = N->getOpcode(); assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || LogicOpcode == ISD::XOR) && "Expected bitwise logic operation"); if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse()) return SDValue(); // Match another bitwise logic op and a shift. unsigned ShiftOpcode = ShiftOp.getOpcode(); if (LogicOp.getOpcode() != LogicOpcode || !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL || ShiftOpcode == ISD::SRA)) return SDValue(); // Match another shift op inside the first logic operand. Handle both commuted // possibilities. // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z SDValue X1 = ShiftOp.getOperand(0); SDValue Y = ShiftOp.getOperand(1); SDValue X0, Z; if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode && LogicOp.getOperand(0).getOperand(1) == Y) { X0 = LogicOp.getOperand(0).getOperand(0); Z = LogicOp.getOperand(1); } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode && LogicOp.getOperand(1).getOperand(1) == Y) { X0 = LogicOp.getOperand(1).getOperand(0); Z = LogicOp.getOperand(0); } else { return SDValue(); } EVT VT = N->getValueType(0); SDLoc DL(N); SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1); SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y); return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z); } SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N1.getValueType(); // x & x --> x if (N0 == N1) return N0; // fold (and c1, c2) -> c1&c2 if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1})) return C; // canonicalize constant to RHS if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); // fold vector ops if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) return FoldedVOp; // fold (and x, 0) -> 0, vector edition if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) // do not return N1, because undef node may exist in N1 return DAG.getConstant(APInt::getZero(N1.getScalarValueSizeInBits()), SDLoc(N), N1.getValueType()); // fold (and x, -1) -> x, vector edition if (ISD::isConstantSplatVectorAllOnes(N1.getNode())) return N0; // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load auto *MLoad = dyn_cast(N0); ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true); if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && N0.hasOneUse() && Splat && N1.hasOneUse()) { EVT LoadVT = MLoad->getMemoryVT(); EVT ExtVT = VT; if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) { // For this AND to be a zero extension of the masked load the elements // of the BuildVec must mask the bottom bits of the extended element // type uint64_t ElementSize = LoadVT.getVectorElementType().getScalarSizeInBits(); if (Splat->getAPIntValue().isMask(ElementSize)) { return DAG.getMaskedLoad( ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(), MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(), LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(), ISD::ZEXTLOAD, MLoad->isExpandingLoad()); } } } } // fold (and x, -1) -> x if (isAllOnesConstant(N1)) return N0; // if (and x, c) is known to be zero, return 0 unsigned BitWidth = VT.getScalarSizeInBits(); ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth))) return DAG.getConstant(0, SDLoc(N), VT); if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; // reassociate and if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags())) return RAND; // Try to convert a constant mask AND into a shuffle clear mask. if (VT.isVector()) if (SDValue Shuffle = XformToShuffleWithZero(N)) return Shuffle; if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N)) return Combined; // fold (and (or x, C), D) -> D if (C & D) == D auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) { return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue()); }; if (N0.getOpcode() == ISD::OR && ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset)) return N1; // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { SDValue N0Op0 = N0.getOperand(0); APInt Mask = ~N1C->getAPIntValue(); Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits()); if (DAG.MaskedValueIsZero(N0Op0, Mask)) { SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N0.getValueType(), N0Op0); // Replace uses of the AND with uses of the Zero extend node. CombineTo(N, Zext); // We actually want to replace all uses of the any_extend with the // zero_extend, to avoid duplicating things. This will later cause this // AND to be folded. CombineTo(N0.getNode(), Zext); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must // already be zero by virtue of the width of the base type of the load. // // the 'X' node here can either be nothing or an extract_vector_elt to catch // more cases. if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() && N0.getOperand(0).getOpcode() == ISD::LOAD && N0.getOperand(0).getResNo() == 0) || (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) { LoadSDNode *Load = cast( (N0.getOpcode() == ISD::LOAD) ? N0 : N0.getOperand(0) ); // Get the constant (if applicable) the zero'th operand is being ANDed with. // This can be a pure constant or a vector splat, in which case we treat the // vector as a scalar and use the splat value. APInt Constant = APInt::getZero(1); if (const ConstantSDNode *C = isConstOrConstSplat( N1, /*AllowUndef=*/false, /*AllowTruncation=*/true)) { Constant = C->getAPIntValue(); } else if (BuildVectorSDNode *Vector = dyn_cast(N1)) { APInt SplatValue, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs); if (IsSplat) { // Undef bits can contribute to a possible optimisation if set, so // set them. SplatValue |= SplatUndef; // The splat value may be something like "0x00FFFFFF", which means 0 for // the first vector value and FF for the rest, repeating. We need a mask // that will apply equally to all members of the vector, so AND all the // lanes of the constant together. unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits(); // If the splat value has been compressed to a bitlength lower // than the size of the vector lane, we need to re-expand it to // the lane size. if (EltBitWidth > SplatBitSize) for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth); SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2) SplatValue |= SplatValue.shl(SplatBitSize); // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value. if ((SplatBitSize % EltBitWidth) == 0) { Constant = APInt::getAllOnes(EltBitWidth); for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i) Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth); } } } // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is // actually legal and isn't going to get expanded, else this is a false // optimisation. bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD, Load->getValueType(0), Load->getMemoryVT()); // Resize the constant to the same size as the original memory access before // extension. If it is still the AllOnesValue then this AND is completely // unneeded. Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits()); bool B; switch (Load->getExtensionType()) { default: B = false; break; case ISD::EXTLOAD: B = CanZextLoadProfitably; break; case ISD::ZEXTLOAD: case ISD::NON_EXTLOAD: B = true; break; } if (B && Constant.isAllOnes()) { // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to // preserve semantics once we get rid of the AND. SDValue NewLoad(Load, 0); // Fold the AND away. NewLoad may get replaced immediately. CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0); if (Load->getExtensionType() == ISD::EXTLOAD) { NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD, Load->getValueType(0), SDLoc(Load), Load->getChain(), Load->getBasePtr(), Load->getOffset(), Load->getMemoryVT(), Load->getMemOperand()); // Replace uses of the EXTLOAD with the new ZEXTLOAD. if (Load->getNumValues() == 3) { // PRE/POST_INC loads have 3 values. SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1), NewLoad.getValue(2) }; CombineTo(Load, To, 3, true); } else { CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1)); } } return SDValue(N, 0); // Return N so it doesn't get rechecked! } } if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C && ISD::isExtOpcode(N0.getOperand(0).getOpcode())) { SDValue Ext = N0.getOperand(0); EVT ExtVT = Ext->getValueType(0); SDValue Extendee = Ext->getOperand(0); unsigned ScalarWidth = Extendee.getValueType().getScalarSizeInBits(); if (N1C->getAPIntValue().isMask(ScalarWidth) && (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, ExtVT))) { // (and (extract_subvector (zext|anyext|sext v) _) iN_mask) // => (extract_subvector (iN_zeroext v)) SDValue ZeroExtExtendee = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), ExtVT, Extendee); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, ZeroExtExtendee, N0.getOperand(1)); } } // fold (and (masked_gather x)) -> (zext_masked_gather x) if (auto *GN0 = dyn_cast(N0)) { EVT MemVT = GN0->getMemoryVT(); EVT ScalarVT = MemVT.getScalarType(); if (SDValue(GN0, 0).hasOneUse() && isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) && TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) { SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(), GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()}; SDValue ZExtLoad = DAG.getMaskedGather( DAG.getVTList(VT, MVT::Other), MemVT, SDLoc(N), Ops, GN0->getMemOperand(), GN0->getIndexType(), ISD::ZEXTLOAD); CombineTo(N, ZExtLoad); AddToWorklist(ZExtLoad.getNode()); // Avoid recheck of N. return SDValue(N, 0); } } // fold (and (load x), 255) -> (zextload x, i8) // fold (and (extload x, i16), 255) -> (zextload x, i8) if (N1C && N0.getOpcode() == ISD::LOAD && !VT.isVector()) if (SDValue Res = reduceLoadWidth(N)) return Res; if (LegalTypes) { // Attempt to propagate the AND back up to the leaves which, if they're // loads, can be combined to narrow loads and the AND node can be removed. // Perform after legalization so that extend nodes will already be // combined into the loads. if (BackwardsPropagateMask(N)) return SDValue(N, 0); } if (SDValue Combined = visitANDLike(N0, N1, N)) return Combined; // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) if (N0.getOpcode() == N1.getOpcode()) if (SDValue V = hoistLogicOpWithSameOpcodeHands(N)) return V; if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG)) return R; if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG)) return R; // Masking the negated extension of a boolean is just the zero-extended // boolean: // and (sub 0, zext(bool X)), 1 --> zext(bool X) // and (sub 0, sext(bool X)), 1 --> zext(bool X) // // Note: the SimplifyDemandedBits fold below can make an information-losing // transform, and then we have no way to find this better fold. if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) { if (isNullOrNullSplat(N0.getOperand(0))) { SDValue SubRHS = N0.getOperand(1); if (SubRHS.getOpcode() == ISD::ZERO_EXTEND && SubRHS.getOperand(0).getScalarValueSizeInBits() == 1) return SubRHS; if (SubRHS.getOpcode() == ISD::SIGN_EXTEND && SubRHS.getOperand(0).getScalarValueSizeInBits() == 1) return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0)); } } // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) // fold (and (sra)) -> (and (srl)) when possible. if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); // fold (zext_inreg (extload x)) -> (zextload x) // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use if (ISD::isUNINDEXEDLoad(N0.getNode()) && (ISD::isEXTLoad(N0.getNode()) || (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) { LoadSDNode *LN0 = cast(N0); EVT MemVT = LN0->getMemoryVT(); // If we zero all the possible extended bits, then we can turn this into // a zextload if we are running before legalize or the operation is legal. unsigned ExtBitSize = N1.getScalarValueSizeInBits(); unsigned MemBitSize = MemVT.getScalarSizeInBits(); APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize); if (DAG.MaskedValueIsZero(N1, ExtBits) && ((!LegalOperations && LN0->isSimple()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(), LN0->getBasePtr(), MemVT, LN0->getMemOperand()); AddToWorklist(N); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const) if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) { if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), N0.getOperand(1), false)) return BSwap; } if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N)) return Shifts; if (SDValue V = combineShiftAnd1ToBitTest(N, DAG)) return V; // Recognize the following pattern: // // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask) // // where bitmask is a mask that clears the upper bits of AndVT. The // number of bits in bitmask must be a power of two. auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) { if (LHS->getOpcode() != ISD::SIGN_EXTEND) return false; auto *C = dyn_cast(RHS); if (!C) return false; if (!C->getAPIntValue().isMask( LHS.getOperand(0).getValueType().getFixedSizeInBits())) return false; return true; }; // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...). if (IsAndZeroExtMask(N0, N1)) return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0)); if (hasOperation(ISD::USUBSAT, VT)) if (SDValue V = foldAndToUsubsat(N, DAG)) return V; return SDValue(); } /// Match (a >> 8) | (a << 8) as (bswap a) >> 16. SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits) { if (!LegalOperations) return SDValue(); EVT VT = N->getValueType(0); if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16) return SDValue(); if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT)) return SDValue(); // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff) bool LookPassAnd0 = false; bool LookPassAnd1 = false; if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL) std::swap(N0, N1); if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL) std::swap(N0, N1); if (N0.getOpcode() == ISD::AND) { if (!N0->hasOneUse()) return SDValue(); ConstantSDNode *N01C = dyn_cast(N0.getOperand(1)); // Also handle 0xffff since the LHS is guaranteed to have zeros there. // This is needed for X86. if (!N01C || (N01C->getZExtValue() != 0xFF00 && N01C->getZExtValue() != 0xFFFF)) return SDValue(); N0 = N0.getOperand(0); LookPassAnd0 = true; } if (N1.getOpcode() == ISD::AND) { if (!N1->hasOneUse()) return SDValue(); ConstantSDNode *N11C = dyn_cast(N1.getOperand(1)); if (!N11C || N11C->getZExtValue() != 0xFF) return SDValue(); N1 = N1.getOperand(0); LookPassAnd1 = true; } if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) std::swap(N0, N1); if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) return SDValue(); if (!N0->hasOneUse() || !N1->hasOneUse()) return SDValue(); ConstantSDNode *N01C = dyn_cast(N0.getOperand(1)); ConstantSDNode *N11C = dyn_cast(N1.getOperand(1)); if (!N01C || !N11C) return SDValue(); if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8) return SDValue(); // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8) SDValue N00 = N0->getOperand(0); if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) { if (!N00->hasOneUse()) return SDValue(); ConstantSDNode *N001C = dyn_cast(N00.getOperand(1)); if (!N001C || N001C->getZExtValue() != 0xFF) return SDValue(); N00 = N00.getOperand(0); LookPassAnd0 = true; } SDValue N10 = N1->getOperand(0); if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) { if (!N10->hasOneUse()) return SDValue(); ConstantSDNode *N101C = dyn_cast(N10.getOperand(1)); // Also allow 0xFFFF since the bits will be shifted out. This is needed // for X86. if (!N101C || (N101C->getZExtValue() != 0xFF00 && N101C->getZExtValue() != 0xFFFF)) return SDValue(); N10 = N10.getOperand(0); LookPassAnd1 = true; } if (N00 != N10) return SDValue(); // Make sure everything beyond the low halfword gets set to zero since the SRL // 16 will clear the top bits. unsigned OpSizeInBits = VT.getSizeInBits(); if (OpSizeInBits > 16) { // If the left-shift isn't masked out then the only way this is a bswap is // if all bits beyond the low 8 are 0. In that case the entire pattern // reduces to a left shift anyway: leave it for other parts of the combiner. if (DemandHighBits && !LookPassAnd0) return SDValue(); // However, if the right shift isn't masked out then it might be because // it's not needed. See if we can spot that too. If the high bits aren't // demanded, we only need bits 23:16 to be zero. Otherwise, we need all // upper bits to be zero. if (!LookPassAnd1) { unsigned HighBit = DemandHighBits ? OpSizeInBits : 24; if (!DAG.MaskedValueIsZero(N10, APInt::getBitsSet(OpSizeInBits, 16, HighBit))) return SDValue(); } } SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00); if (OpSizeInBits > 16) { SDLoc DL(N); Res = DAG.getNode(ISD::SRL, DL, VT, Res, DAG.getConstant(OpSizeInBits - 16, DL, getShiftAmountTy(VT))); } return Res; } /// Return true if the specified node is an element that makes up a 32-bit /// packed halfword byteswap. /// ((x & 0x000000ff) << 8) | /// ((x & 0x0000ff00) >> 8) | /// ((x & 0x00ff0000) << 8) | /// ((x & 0xff000000) >> 8) static bool isBSwapHWordElement(SDValue N, MutableArrayRef Parts) { if (!N->hasOneUse()) return false; unsigned Opc = N.getOpcode(); if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL) return false; SDValue N0 = N.getOperand(0); unsigned Opc0 = N0.getOpcode(); if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL) return false; ConstantSDNode *N1C = nullptr; // SHL or SRL: look upstream for AND mask operand if (Opc == ISD::AND) N1C = dyn_cast(N.getOperand(1)); else if (Opc0 == ISD::AND) N1C = dyn_cast(N0.getOperand(1)); if (!N1C) return false; unsigned MaskByteOffset; switch (N1C->getZExtValue()) { default: return false; case 0xFF: MaskByteOffset = 0; break; case 0xFF00: MaskByteOffset = 1; break; case 0xFFFF: // In case demanded bits didn't clear the bits that will be shifted out. // This is needed for X86. if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) { MaskByteOffset = 1; break; } return false; case 0xFF0000: MaskByteOffset = 2; break; case 0xFF000000: MaskByteOffset = 3; break; } // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00). if (Opc == ISD::AND) { if (MaskByteOffset == 0 || MaskByteOffset == 2) { // (x >> 8) & 0xff // (x >> 8) & 0xff0000 if (Opc0 != ISD::SRL) return false; ConstantSDNode *C = dyn_cast(N0.getOperand(1)); if (!C || C->getZExtValue() != 8) return false; } else { // (x << 8) & 0xff00 // (x << 8) & 0xff000000 if (Opc0 != ISD::SHL) return false; ConstantSDNode *C = dyn_cast(N0.getOperand(1)); if (!C || C->getZExtValue() != 8) return false; } } else if (Opc == ISD::SHL) { // (x & 0xff) << 8 // (x & 0xff0000) << 8 if (MaskByteOffset != 0 && MaskByteOffset != 2) return false; ConstantSDNode *C = dyn_cast(N.getOperand(1)); if (!C || C->getZExtValue() != 8) return false; } else { // Opc == ISD::SRL // (x & 0xff00) >> 8 // (x & 0xff000000) >> 8 if (MaskByteOffset != 1 && MaskByteOffset != 3) return false; ConstantSDNode *C = dyn_cast(N.getOperand(1)); if (!C || C->getZExtValue() != 8) return false; } if (Parts[MaskByteOffset]) return false; Parts[MaskByteOffset] = N0.getOperand(0).getNode(); return true; } // Match 2 elements of a packed halfword bswap. static bool isBSwapHWordPair(SDValue N, MutableArrayRef Parts) { if (N.getOpcode() == ISD::OR) return isBSwapHWordElement(N.getOperand(0), Parts) && isBSwapHWordElement(N.getOperand(1), Parts); if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) { ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1)); if (!C || C->getAPIntValue() != 16) return false; Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode(); return true; } return false; } // Match this pattern: // (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff)) // And rewrite this to: // (rotr (bswap A), 16) static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI, SelectionDAG &DAG, SDNode *N, SDValue N0, SDValue N1, EVT VT, EVT ShiftAmountTy) { assert(N->getOpcode() == ISD::OR && VT == MVT::i32 && "MatchBSwapHWordOrAndAnd: expecting i32"); if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) return SDValue(); if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND) return SDValue(); // TODO: this is too restrictive; lifting this restriction requires more tests if (!N0->hasOneUse() || !N1->hasOneUse()) return SDValue(); ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1)); ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1)); if (!Mask0 || !Mask1) return SDValue(); if (Mask0->getAPIntValue() != 0xff00ff00 || Mask1->getAPIntValue() != 0x00ff00ff) return SDValue(); SDValue Shift0 = N0.getOperand(0); SDValue Shift1 = N1.getOperand(0); if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL) return SDValue(); ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1)); ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1)); if (!ShiftAmt0 || !ShiftAmt1) return SDValue(); if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8) return SDValue(); if (Shift0.getOperand(0) != Shift1.getOperand(0)) return SDValue(); SDLoc DL(N); SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0)); SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy); return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt); } /// Match a 32-bit packed halfword bswap. That is /// ((x & 0x000000ff) << 8) | /// ((x & 0x0000ff00) >> 8) | /// ((x & 0x00ff0000) << 8) | /// ((x & 0xff000000) >> 8) /// => (rotl (bswap x), 16) SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { if (!LegalOperations) return SDValue(); EVT VT = N->getValueType(0); if (VT != MVT::i32) return SDValue(); if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT)) return SDValue(); if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT, getShiftAmountTy(VT))) return BSwap; // Try again with commuted operands. if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT, getShiftAmountTy(VT))) return BSwap; // Look for either // (or (bswaphpair), (bswaphpair)) // (or (or (bswaphpair), (and)), (and)) // (or (or (and), (bswaphpair)), (and)) SDNode *Parts[4] = {}; if (isBSwapHWordPair(N0, Parts)) { // (or (or (and), (and)), (or (and), (and))) if (!isBSwapHWordPair(N1, Parts)) return SDValue(); } else if (N0.getOpcode() == ISD::OR) { // (or (or (or (and), (and)), (and)), (and)) if (!isBSwapHWordElement(N1, Parts)) return SDValue(); SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) && !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts))) return SDValue(); } else { return SDValue(); } // Make sure the parts are all coming from the same node. if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) return SDValue(); SDLoc DL(N); SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, SDValue(Parts[0], 0)); // Result of the bswap should be rotated by 16. If it's not legal, then // do (x << 16) | (x >> 16). SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT)); if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt); if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt); return DAG.getNode(ISD::OR, DL, VT, DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt), DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt)); } /// This contains all DAGCombine rules which reduce two values combined by /// an Or operation to a single value \see visitANDLike(). SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) { EVT VT = N1.getValueType(); SDLoc DL(N); // fold (or x, undef) -> -1 if (!LegalOperations && (N0.isUndef() || N1.isUndef())) return DAG.getAllOnesConstant(DL, VT); if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL)) return V; // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND && // Don't increase # computations. (N0->hasOneUse() || N1->hasOneUse())) { // We can only do this xform if we know that bits from X that are set in C2 // but not in C1 are already zero. Likewise for Y. if (const ConstantSDNode *N0O1C = getAsNonOpaqueConstant(N0.getOperand(1))) { if (const ConstantSDNode *N1O1C = getAsNonOpaqueConstant(N1.getOperand(1))) { // We can only do this xform if we know that bits from X that are set in // C2 but not in C1 are already zero. Likewise for Y. const APInt &LHSMask = N0O1C->getAPIntValue(); const APInt &RHSMask = N1O1C->getAPIntValue(); if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1.getOperand(0)); return DAG.getNode(ISD::AND, DL, VT, X, DAG.getConstant(LHSMask | RHSMask, DL, VT)); } } } } // (or (and X, M), (and X, N)) -> (and X, (or M, N)) if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND && N0.getOperand(0) == N1.getOperand(0) && // Don't increase # computations. (N0->hasOneUse() || N1->hasOneUse())) { SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(1), N1.getOperand(1)); return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X); } return SDValue(); } /// OR combines for which the commuted variant will be tried as well. static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) { EVT VT = N0.getValueType(); if (N0.getOpcode() == ISD::AND) { SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y) // TODO: Set AllowUndefs = true. if (getBitwiseNotOperand(N01, N00, /* AllowUndefs */ false) == N1) return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N1); // fold (or (and (xor Y, -1), X), Y) -> (or X, Y) if (getBitwiseNotOperand(N00, N01, /* AllowUndefs */ false) == N1) return DAG.getNode(ISD::OR, SDLoc(N), VT, N01, N1); } if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG)) return R; auto peekThroughZext = [](SDValue V) { if (V->getOpcode() == ISD::ZERO_EXTEND) return V->getOperand(0); return V; }; // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL && N0.getOperand(0) == N1.getOperand(0) && peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1))) return N0; // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL && N0.getOperand(1) == N1.getOperand(0) && peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1))) return N0; return SDValue(); } SDValue DAGCombiner::visitOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N1.getValueType(); // x | x --> x if (N0 == N1) return N0; // fold (or c1, c2) -> c1|c2 if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1})) return C; // canonicalize constant to RHS if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); // fold vector ops if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) return FoldedVOp; // fold (or x, 0) -> x, vector edition if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) return N0; // fold (or x, -1) -> -1, vector edition if (ISD::isConstantSplatVectorAllOnes(N1.getNode())) // do not return N1, because undef node may exist in N1 return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType()); // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask) // Do this only if the resulting type / shuffle is legal. auto *SV0 = dyn_cast(N0); auto *SV1 = dyn_cast(N1); if (SV0 && SV1 && TLI.isTypeLegal(VT)) { bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode()); bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode()); bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode()); // Ensure both shuffles have a zero input. if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) { assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!"); assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!"); bool CanFold = true; int NumElts = VT.getVectorNumElements(); SmallVector Mask(NumElts, -1); for (int i = 0; i != NumElts; ++i) { int M0 = SV0->getMaskElt(i); int M1 = SV1->getMaskElt(i); // Determine if either index is pointing to a zero vector. bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts)); bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts)); // If one element is zero and the otherside is undef, keep undef. // This also handles the case that both are undef. if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) continue; // Make sure only one of the elements is zero. if (M0Zero == M1Zero) { CanFold = false; break; } assert((M0 >= 0 || M1 >= 0) && "Undef index!"); // We have a zero and non-zero element. If the non-zero came from // SV0 make the index a LHS index. If it came from SV1, make it // a RHS index. We need to mod by NumElts because we don't care // which operand it came from in the original shuffles. Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts; } if (CanFold) { SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0); SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0); SDValue LegalShuffle = TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask, DAG); if (LegalShuffle) return LegalShuffle; } } } } // fold (or x, 0) -> x if (isNullConstant(N1)) return N0; // fold (or x, -1) -> -1 if (isAllOnesConstant(N1)) return N1; if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; // fold (or x, c) -> c iff (x & ~c) == 0 ConstantSDNode *N1C = dyn_cast(N1); if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) return N1; if (SDValue Combined = visitORLike(N0, N1, N)) return Combined; if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N)) return Combined; // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) if (SDValue BSwap = MatchBSwapHWord(N, N0, N1)) return BSwap; if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1)) return BSwap; // reassociate or if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags())) return ROR; // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) // iff (c1 & c2) != 0 or c1/c2 are undef. auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) { return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue()); }; if (N0.getOpcode() == ISD::AND && N0->hasOneUse() && ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) { if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT, {N1, N0.getOperand(1)})) { SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1); AddToWorklist(IOR.getNode()); return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR); } } if (SDValue Combined = visitORCommutative(DAG, N0, N1, N)) return Combined; if (SDValue Combined = visitORCommutative(DAG, N1, N0, N)) return Combined; // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) if (N0.getOpcode() == N1.getOpcode()) if (SDValue V = hoistLogicOpWithSameOpcodeHands(N)) return V; // See if this is some rotate idiom. if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N))) return Rot; if (SDValue Load = MatchLoadCombine(N)) return Load; // Simplify the operands using demanded-bits information. if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); // If OR can be rewritten into ADD, try combines based on ADD. if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) && DAG.haveNoCommonBitsSet(N0, N1)) if (SDValue Combined = visitADDLike(N)) return Combined; return SDValue(); } static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) { if (Op.getOpcode() == ISD::AND && DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) { Mask = Op.getOperand(1); return Op.getOperand(0); } return Op; } /// Match "(X shl/srl V1) & V2" where V2 may not be present. static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift, SDValue &Mask) { Op = stripConstantMask(DAG, Op, Mask); if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) { Shift = Op; return true; } return false; } /// Helper function for visitOR to extract the needed side of a rotate idiom /// from a shl/srl/mul/udiv. This is meant to handle cases where /// InstCombine merged some outside op with one of the shifts from /// the rotate pattern. /// \returns An empty \c SDValue if the needed shift couldn't be extracted. /// Otherwise, returns an expansion of \p ExtractFrom based on the following /// patterns: /// /// (or (add v v) (shrl v bitwidth-1)): /// expands (add v v) -> (shl v 1) /// /// (or (mul v c0) (shrl (mul v c1) c2)): /// expands (mul v c0) -> (shl (mul v c1) c3) /// /// (or (udiv v c0) (shl (udiv v c1) c2)): /// expands (udiv v c0) -> (shrl (udiv v c1) c3) /// /// (or (shl v c0) (shrl (shl v c1) c2)): /// expands (shl v c0) -> (shl (shl v c1) c3) /// /// (or (shrl v c0) (shl (shrl v c1) c2)): /// expands (shrl v c0) -> (shrl (shrl v c1) c3) /// /// Such that in all cases, c3+c2==bitwidth(op v c1). static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, SDValue ExtractFrom, SDValue &Mask, const SDLoc &DL) { assert(OppShift && ExtractFrom && "Empty SDValue"); - assert( - (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) && - "Existing shift must be valid as a rotate half"); + if (OppShift.getOpcode() != ISD::SHL && OppShift.getOpcode() != ISD::SRL) + return SDValue(); ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask); // Value and Type of the shift. SDValue OppShiftLHS = OppShift.getOperand(0); EVT ShiftedVT = OppShiftLHS.getValueType(); // Amount of the existing shift. ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1)); // (add v v) -> (shl v 1) // TODO: Should this be a general DAG canonicalization? if (OppShift.getOpcode() == ISD::SRL && OppShiftCst && ExtractFrom.getOpcode() == ISD::ADD && ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) && ExtractFrom.getOperand(0) == OppShiftLHS && OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1) return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS, DAG.getShiftAmountConstant(1, ShiftedVT, DL)); // Preconditions: // (or (op0 v c0) (shiftl/r (op0 v c1) c2)) // // Find opcode of the needed shift to be extracted from (op0 v c0). unsigned Opcode = ISD::DELETED_NODE; bool IsMulOrDiv = false; // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift // opcode or its arithmetic (mul or udiv) variant. auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) { IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant; if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift) return false; Opcode = NeededShift; return true; }; // op0 must be either the needed shift opcode or the mul/udiv equivalent // that the needed shift can be extracted from. if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) && (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV))) return SDValue(); // op0 must be the same opcode on both sides, have the same LHS argument, // and produce the same value type. if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() || OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) || ShiftedVT != ExtractFrom.getValueType()) return SDValue(); // Constant mul/udiv/shift amount from the RHS of the shift's LHS op. ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1)); // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op. ConstantSDNode *ExtractFromCst = isConstOrConstSplat(ExtractFrom.getOperand(1)); // TODO: We should be able to handle non-uniform constant vectors for these values // Check that we have constant values. if (!OppShiftCst || !OppShiftCst->getAPIntValue() || !OppLHSCst || !OppLHSCst->getAPIntValue() || !ExtractFromCst || !ExtractFromCst->getAPIntValue()) return SDValue(); // Compute the shift amount we need to extract to complete the rotate. const unsigned VTWidth = ShiftedVT.getScalarSizeInBits(); if (OppShiftCst->getAPIntValue().ugt(VTWidth)) return SDValue(); APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue(); // Normalize the bitwidth of the two mul/udiv/shift constant operands. APInt ExtractFromAmt = ExtractFromCst->getAPIntValue(); APInt OppLHSAmt = OppLHSCst->getAPIntValue(); zeroExtendToMatch(ExtractFromAmt, OppLHSAmt); // Now try extract the needed shift from the ExtractFrom op and see if the // result matches up with the existing shift's LHS op. if (IsMulOrDiv) { // Op to extract from is a mul or udiv by a constant. // Check: // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(), NeededShiftAmt.getZExtValue()); APInt ResultAmt; APInt Rem; APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem); if (Rem != 0 || ResultAmt != OppLHSAmt) return SDValue(); } else { // Op to extract from is a shift by a constant. // Check: // c2 - (bitwidth(op0 v c0) - c1) == c0 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc( ExtractFromAmt.getBitWidth())) return SDValue(); } // Return the expanded shift op that should allow a rotate to be formed. EVT ShiftVT = OppShift.getOperand(1).getValueType(); EVT ResVT = ExtractFrom.getValueType(); SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT); return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode); } // Return true if we can prove that, whenever Neg and Pos are both in the // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that // for two opposing shifts shift1 and shift2 and a value X with OpBits bits: // // (or (shift1 X, Neg), (shift2 X, Pos)) // // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate // in direction shift1 by Neg. The range [0, EltSize) means that we only need // to consider shift amounts with defined behavior. // // The IsRotate flag should be set when the LHS of both shifts is the same. // Otherwise if matching a general funnel shift, it should be clear. static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, SelectionDAG &DAG, bool IsRotate) { const auto &TLI = DAG.getTargetLoweringInfo(); // If EltSize is a power of 2 then: // // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1) // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize). // // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check // for the stronger condition: // // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A] // // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1) // we can just replace Neg with Neg' for the rest of the function. // // In other cases we check for the even stronger condition: // // Neg == EltSize - Pos [B] // // for all Neg and Pos. Note that the (or ...) then invokes undefined // behavior if Pos == 0 (and consequently Neg == EltSize). // // We could actually use [A] whenever EltSize is a power of 2, but the // only extra cases that it would match are those uninteresting ones // where Neg and Pos are never in range at the same time. E.g. for // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos) // as well as (sub 32, Pos), but: // // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos)) // // always invokes undefined behavior for 32-bit X. // // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise. // This allows us to peek through any operations that only affect Mask's // un-demanded bits. // // NOTE: We can only do this when matching operations which won't modify the // least Log2(EltSize) significant bits and not a general funnel shift. unsigned MaskLoBits = 0; if (IsRotate && isPowerOf2_64(EltSize)) { unsigned Bits = Log2_64(EltSize); unsigned NegBits = Neg.getScalarValueSizeInBits(); if (NegBits >= Bits) { APInt DemandedBits = APInt::getLowBitsSet(NegBits, Bits); if (SDValue Inner = TLI.SimplifyMultipleUseDemandedBits(Neg, DemandedBits, DAG)) { Neg = Inner; MaskLoBits = Bits; } } } // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1. if (Neg.getOpcode() != ISD::SUB) return false; ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0)); if (!NegC) return false; SDValue NegOp1 = Neg.getOperand(1); // On the RHS of [A], if Pos is the result of operation on Pos' that won't // affect Mask's demanded bits, just replace Pos with Pos'. These operations // are redundant for the purpose of the equality. if (MaskLoBits) { unsigned PosBits = Pos.getScalarValueSizeInBits(); if (PosBits >= MaskLoBits) { APInt DemandedBits = APInt::getLowBitsSet(PosBits, MaskLoBits); if (SDValue Inner = TLI.SimplifyMultipleUseDemandedBits(Pos, DemandedBits, DAG)) { Pos = Inner; } } } // The condition we need is now: // // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask // // If NegOp1 == Pos then we need: // // EltSize & Mask == NegC & Mask // // (because "x & Mask" is a truncation and distributes through subtraction). // // We also need to account for a potential truncation of NegOp1 if the amount // has already been legalized to a shift amount type. APInt Width; if ((Pos == NegOp1) || (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0))) Width = NegC->getAPIntValue(); // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC. // Then the condition we want to prove becomes: // // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask // // which, again because "x & Mask" is a truncation, becomes: // // NegC & Mask == (EltSize - PosC) & Mask // EltSize & Mask == (NegC + PosC) & Mask else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) { if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) Width = PosC->getAPIntValue() + NegC->getAPIntValue(); else return false; } else return false; // Now we just need to check that EltSize & Mask == Width & Mask. if (MaskLoBits) // EltSize & Mask is 0 since Mask is EltSize - 1. return Width.getLoBits(MaskLoBits) == 0; return Width == EltSize; } // A subroutine of MatchRotate used once we have found an OR of two opposite // shifts of Shifted. If Neg == - Pos then the OR reduces // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the // former being preferred if supported. InnerPos and InnerNeg are Pos and // Neg with outer conversions stripped away. SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, SDValue InnerPos, SDValue InnerNeg, bool HasPos, unsigned PosOpcode, unsigned NegOpcode, const SDLoc &DL) { // fold (or (shl x, (*ext y)), // (srl x, (*ext (sub 32, y)))) -> // (rotl x, y) or (rotr x, (sub 32, y)) // // fold (or (shl x, (*ext (sub 32, y))), // (srl x, (*ext y))) -> // (rotr x, y) or (rotl x, (sub 32, y)) EVT VT = Shifted.getValueType(); if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG, /*IsRotate*/ true)) { return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, HasPos ? Pos : Neg); } return SDValue(); } // A subroutine of MatchRotate used once we have found an OR of two opposite // shifts of N0 + N1. If Neg == - Pos then the OR reduces // to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the // former being preferred if supported. InnerPos and InnerNeg are Pos and // Neg with outer conversions stripped away. // TODO: Merge with MatchRotatePosNeg. SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg, SDValue InnerPos, SDValue InnerNeg, bool HasPos, unsigned PosOpcode, unsigned NegOpcode, const SDLoc &DL) { EVT VT = N0.getValueType(); unsigned EltBits = VT.getScalarSizeInBits(); // fold (or (shl x0, (*ext y)), // (srl x1, (*ext (sub 32, y)))) -> // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y)) // // fold (or (shl x0, (*ext (sub 32, y))), // (srl x1, (*ext y))) -> // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y)) if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) { return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1, HasPos ? Pos : Neg); } // Matching the shift+xor cases, we can't easily use the xor'd shift amount // so for now just use the PosOpcode case if its legal. // TODO: When can we use the NegOpcode case? if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) { auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) { if (Op.getOpcode() != BinOpc) return false; ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1)); return Cst && (Cst->getAPIntValue() == Imm); }; // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31))) // -> (fshl x0, x1, y) if (IsBinOpImm(N1, ISD::SRL, 1) && IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) && InnerPos == InnerNeg.getOperand(0) && TLI.isOperationLegalOrCustom(ISD::FSHL, VT)) { return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos); } // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y)) // -> (fshr x0, x1, y) if (IsBinOpImm(N0, ISD::SHL, 1) && IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) && InnerNeg == InnerPos.getOperand(0) && TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) { return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg); } // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y)) // -> (fshr x0, x1, y) // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization? if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) && IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) && InnerNeg == InnerPos.getOperand(0) && TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) { return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg); } } return SDValue(); } // MatchRotate - Handle an 'or' of two operands. If this is one of the many // idioms for rotate, and if the target supports rotation instructions, generate // a rot[lr]. This also matches funnel shift patterns, similar to rotation but // with different shifted sources. SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { EVT VT = LHS.getValueType(); // The target must have at least one rotate/funnel flavor. // We still try to match rotate by constant pre-legalization. // TODO: Support pre-legalization funnel-shift by constant. bool HasROTL = hasOperation(ISD::ROTL, VT); bool HasROTR = hasOperation(ISD::ROTR, VT); bool HasFSHL = hasOperation(ISD::FSHL, VT); bool HasFSHR = hasOperation(ISD::FSHR, VT); // If the type is going to be promoted and the target has enabled custom // lowering for rotate, allow matching rotate by non-constants. Only allow // this for scalar types. if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) == TargetLowering::TypePromoteInteger) { HasROTL |= TLI.getOperationAction(ISD::ROTL, VT) == TargetLowering::Custom; HasROTR |= TLI.getOperationAction(ISD::ROTR, VT) == TargetLowering::Custom; } if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR) return SDValue(); // Check for truncated rotate. if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE && LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) { assert(LHS.getValueType() == RHS.getValueType()); if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) { return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot); } } // Match "(X shl/srl V1) & V2" where V2 may not be present. SDValue LHSShift; // The shift. SDValue LHSMask; // AND value if any. matchRotateHalf(DAG, LHS, LHSShift, LHSMask); SDValue RHSShift; // The shift. SDValue RHSMask; // AND value if any. matchRotateHalf(DAG, RHS, RHSShift, RHSMask); // If neither side matched a rotate half, bail if (!LHSShift && !RHSShift) return SDValue(); // InstCombine may have combined a constant shl, srl, mul, or udiv with one // side of the rotate, so try to handle that here. In all cases we need to // pass the matched shift from the opposite side to compute the opcode and // needed shift amount to extract. We still want to do this if both sides // matched a rotate half because one half may be a potential overshift that // can be broken down (ie if InstCombine merged two shl or srl ops into a // single one). // Have LHS side of the rotate, try to extract the needed shift from the RHS. if (LHSShift) if (SDValue NewRHSShift = extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL)) RHSShift = NewRHSShift; // Have RHS side of the rotate, try to extract the needed shift from the LHS. if (RHSShift) if (SDValue NewLHSShift = extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL)) LHSShift = NewLHSShift; // If a side is still missing, nothing else we can do. if (!RHSShift || !LHSShift) return SDValue(); // At this point we've matched or extracted a shift op on each side. if (LHSShift.getOpcode() == RHSShift.getOpcode()) return SDValue(); // Shifts must disagree. // Canonicalize shl to left side in a shl/srl pair. if (RHSShift.getOpcode() == ISD::SHL) { std::swap(LHS, RHS); std::swap(LHSShift, RHSShift); std::swap(LHSMask, RHSMask); } // Something has gone wrong - we've lost the shl/srl pair - bail. if (LHSShift.getOpcode() != ISD::SHL || RHSShift.getOpcode() != ISD::SRL) return SDValue(); unsigned EltSizeInBits = VT.getScalarSizeInBits(); SDValue LHSShiftArg = LHSShift.getOperand(0); SDValue LHSShiftAmt = LHSShift.getOperand(1); SDValue RHSShiftArg = RHSShift.getOperand(0); SDValue RHSShiftAmt = RHSShift.getOperand(1); auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS, ConstantSDNode *RHS) { return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits; }; auto ApplyMasks = [&](SDValue Res) { // If there is an AND of either shifted operand, apply it to the result. if (LHSMask.getNode() || RHSMask.getNode()) { SDValue AllOnes = DAG.getAllOnesConstant(DL, VT); SDValue Mask = AllOnes; if (LHSMask.getNode()) { SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt); Mask = DAG.getNode(ISD::AND, DL, VT, Mask, DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits)); } if (RHSMask.getNode()) { SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt); Mask = DAG.getNode(ISD::AND, DL, VT, Mask, DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits)); } Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask); } return Res; }; // TODO: Support pre-legalization funnel-shift by constant. bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0); if (!IsRotate && !(HasFSHL || HasFSHR)) { if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() && ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) { // Look for a disguised rotate by constant. // The common shifted operand X may be hidden inside another 'or'. SDValue X, Y; auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) { if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR) return false; if (CommonOp == Or.getOperand(0)) { X = CommonOp; Y = Or.getOperand(1); return true; } if (CommonOp == Or.getOperand(1)) { X = CommonOp; Y = Or.getOperand(0); return true; } return false; }; SDValue Res; if (matchOr(LHSShiftArg, RHSShiftArg)) { // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1) SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt); SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt); Res = DAG.getNode(ISD::OR, DL, VT, RotX, ShlY); } else if (matchOr(RHSShiftArg, LHSShiftArg)) { // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2) SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt); SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt); Res = DAG.getNode(ISD::OR, DL, VT, RotX, SrlY); } else { return SDValue(); } return ApplyMasks(Res); } return SDValue(); // Requires funnel shift support. } // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1) // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2) // iff C1+C2 == EltSizeInBits if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) { SDValue Res; if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) { bool UseROTL = !LegalOperations || HasROTL; Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, UseROTL ? LHSShiftAmt : RHSShiftAmt); } else { bool UseFSHL = !LegalOperations || HasFSHL; Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg, RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt); } return ApplyMasks(Res); } // Even pre-legalization, we can't easily rotate/funnel-shift by a variable // shift. if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR) return SDValue(); // If there is a mask here, and we have a variable shift, we can't be sure // that we're masking out the right stuff. if (LHSMask.getNode() || RHSMask.getNode()) return SDValue(); // If the shift amount is sign/zext/any-extended just peel it off. SDValue LExtOp0 = LHSShiftAmt; SDValue RExtOp0 = RHSShiftAmt; if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { LExtOp0 = LHSShiftAmt.getOperand(0); RExtOp0 = RHSShiftAmt.getOperand(0); } if (IsRotate && (HasROTL || HasROTR)) { SDValue TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0, RExtOp0, HasROTL, ISD::ROTL, ISD::ROTR, DL); if (TryL) return TryL; SDValue TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0, LExtOp0, HasROTR, ISD::ROTR, ISD::ROTL, DL); if (TryR) return TryR; } SDValue TryL = MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0, RExtOp0, HasFSHL, ISD::FSHL, ISD::FSHR, DL); if (TryL) return TryL; SDValue TryR = MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0, LExtOp0, HasFSHR, ISD::FSHR, ISD::FSHL, DL); if (TryR) return TryR; return SDValue(); } namespace { /// Represents known origin of an individual byte in load combine pattern. The /// value of the byte is either constant zero or comes from memory. struct ByteProvider { // For constant zero providers Load is set to nullptr. For memory providers // Load represents the node which loads the byte from memory. // ByteOffset is the offset of the byte in the value produced by the load. LoadSDNode *Load = nullptr; unsigned ByteOffset = 0; ByteProvider() = default; static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) { return ByteProvider(Load, ByteOffset); } static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); } bool isConstantZero() const { return !Load; } bool isMemory() const { return Load; } bool operator==(const ByteProvider &Other) const { return Other.Load == Load && Other.ByteOffset == ByteOffset; } private: ByteProvider(LoadSDNode *Load, unsigned ByteOffset) : Load(Load), ByteOffset(ByteOffset) {} }; } // end anonymous namespace /// Recursively traverses the expression calculating the origin of the requested /// byte of the given value. Returns None if the provider can't be calculated. /// /// For all the values except the root of the expression verifies that the value /// has exactly one use and if it's not true return None. This way if the origin /// of the byte is returned it's guaranteed that the values which contribute to /// the byte are not used outside of this expression. /// /// Because the parts of the expression are not allowed to have more than one /// use this function iterates over trees, not DAGs. So it never visits the same /// node more than once. static const Optional calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, bool Root = false) { // Typical i64 by i8 pattern requires recursion up to 8 calls depth if (Depth == 10) return None; if (!Root && !Op.hasOneUse()) return None; assert(Op.getValueType().isScalarInteger() && "can't handle other types"); unsigned BitWidth = Op.getValueSizeInBits(); if (BitWidth % 8 != 0) return None; unsigned ByteWidth = BitWidth / 8; assert(Index < ByteWidth && "invalid index requested"); (void) ByteWidth; switch (Op.getOpcode()) { case ISD::OR: { auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1); if (!LHS) return None; auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1); if (!RHS) return None; if (LHS->isConstantZero()) return RHS; if (RHS->isConstantZero()) return LHS; return None; } case ISD::SHL: { auto ShiftOp = dyn_cast(Op->getOperand(1)); if (!ShiftOp) return None; uint64_t BitShift = ShiftOp->getZExtValue(); if (BitShift % 8 != 0) return None; uint64_t ByteShift = BitShift / 8; return Index < ByteShift ? ByteProvider::getConstantZero() : calculateByteProvider(Op->getOperand(0), Index - ByteShift, Depth + 1); } case ISD::ANY_EXTEND: case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: { SDValue NarrowOp = Op->getOperand(0); unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits(); if (NarrowBitWidth % 8 != 0) return None; uint64_t NarrowByteWidth = NarrowBitWidth / 8; if (Index >= NarrowByteWidth) return Op.getOpcode() == ISD::ZERO_EXTEND ? Optional(ByteProvider::getConstantZero()) : None; return calculateByteProvider(NarrowOp, Index, Depth + 1); } case ISD::BSWAP: return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1, Depth + 1); case ISD::LOAD: { auto L = cast(Op.getNode()); if (!L->isSimple() || L->isIndexed()) return None; unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits(); if (NarrowBitWidth % 8 != 0) return None; uint64_t NarrowByteWidth = NarrowBitWidth / 8; if (Index >= NarrowByteWidth) return L->getExtensionType() == ISD::ZEXTLOAD ? Optional(ByteProvider::getConstantZero()) : None; return ByteProvider::getMemory(L, Index); } } return None; } static unsigned littleEndianByteAt(unsigned BW, unsigned i) { return i; } static unsigned bigEndianByteAt(unsigned BW, unsigned i) { return BW - i - 1; } // Check if the bytes offsets we are looking at match with either big or // little endian value loaded. Return true for big endian, false for little // endian, and None if match failed. static Optional isBigEndian(const ArrayRef ByteOffsets, int64_t FirstOffset) { // The endian can be decided only when it is 2 bytes at least. unsigned Width = ByteOffsets.size(); if (Width < 2) return None; bool BigEndian = true, LittleEndian = true; for (unsigned i = 0; i < Width; i++) { int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset; LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i); BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i); if (!BigEndian && !LittleEndian) return None; } assert((BigEndian != LittleEndian) && "It should be either big endian or" "little endian"); return BigEndian; } static SDValue stripTruncAndExt(SDValue Value) { switch (Value.getOpcode()) { case ISD::TRUNCATE: case ISD::ZERO_EXTEND: case ISD::SIGN_EXTEND: case ISD::ANY_EXTEND: return stripTruncAndExt(Value.getOperand(0)); } return Value; } /// Match a pattern where a wide type scalar value is stored by several narrow /// stores. Fold it into a single store or a BSWAP and a store if the targets /// supports it. /// /// Assuming little endian target: /// i8 *p = ... /// i32 val = ... /// p[0] = (val >> 0) & 0xFF; /// p[1] = (val >> 8) & 0xFF; /// p[2] = (val >> 16) & 0xFF; /// p[3] = (val >> 24) & 0xFF; /// => /// *((i32)p) = val; /// /// i8 *p = ... /// i32 val = ... /// p[0] = (val >> 24) & 0xFF; /// p[1] = (val >> 16) & 0xFF; /// p[2] = (val >> 8) & 0xFF; /// p[3] = (val >> 0) & 0xFF; /// => /// *((i32)p) = BSWAP(val); SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) { // The matching looks for "store (trunc x)" patterns that appear early but are // likely to be replaced by truncating store nodes during combining. // TODO: If there is evidence that running this later would help, this // limitation could be removed. Legality checks may need to be added // for the created store and optional bswap/rotate. if (LegalOperations || OptLevel == CodeGenOpt::None) return SDValue(); // We only handle merging simple stores of 1-4 bytes. // TODO: Allow unordered atomics when wider type is legal (see D66309) EVT MemVT = N->getMemoryVT(); if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) || !N->isSimple() || N->isIndexed()) return SDValue(); // Collect all of the stores in the chain. SDValue Chain = N->getChain(); SmallVector Stores = {N}; while (auto *Store = dyn_cast(Chain)) { // All stores must be the same size to ensure that we are writing all of the // bytes in the wide value. // TODO: We could allow multiple sizes by tracking each stored byte. if (Store->getMemoryVT() != MemVT || !Store->isSimple() || Store->isIndexed()) return SDValue(); Stores.push_back(Store); Chain = Store->getChain(); } // There is no reason to continue if we do not have at least a pair of stores. if (Stores.size() < 2) return SDValue(); // Handle simple types only. LLVMContext &Context = *DAG.getContext(); unsigned NumStores = Stores.size(); unsigned NarrowNumBits = N->getMemoryVT().getScalarSizeInBits(); unsigned WideNumBits = NumStores * NarrowNumBits; EVT WideVT = EVT::getIntegerVT(Context, WideNumBits); if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64) return SDValue(); // Check if all bytes of the source value that we are looking at are stored // to the same base address. Collect offsets from Base address into OffsetMap. SDValue SourceValue; SmallVector OffsetMap(NumStores, INT64_MAX); int64_t FirstOffset = INT64_MAX; StoreSDNode *FirstStore = nullptr; Optional Base; for (auto *Store : Stores) { // All the stores store different parts of the CombinedValue. A truncate is // required to get the partial value. SDValue Trunc = Store->getValue(); if (Trunc.getOpcode() != ISD::TRUNCATE) return SDValue(); // Other than the first/last part, a shift operation is required to get the // offset. int64_t Offset = 0; SDValue WideVal = Trunc.getOperand(0); if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) && isa(WideVal.getOperand(1))) { // The shift amount must be a constant multiple of the narrow type. // It is translated to the offset address in the wide source value "y". // // x = srl y, ShiftAmtC // i8 z = trunc x // store z, ... uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1); if (ShiftAmtC % NarrowNumBits != 0) return SDValue(); Offset = ShiftAmtC / NarrowNumBits; WideVal = WideVal.getOperand(0); } // Stores must share the same source value with different offsets. // Truncate and extends should be stripped to get the single source value. if (!SourceValue) SourceValue = WideVal; else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal)) return SDValue(); else if (SourceValue.getValueType() != WideVT) { if (WideVal.getValueType() == WideVT || WideVal.getScalarValueSizeInBits() > SourceValue.getScalarValueSizeInBits()) SourceValue = WideVal; // Give up if the source value type is smaller than the store size. if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits()) return SDValue(); } // Stores must share the same base address. BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG); int64_t ByteOffsetFromBase = 0; if (!Base) Base = Ptr; else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase)) return SDValue(); // Remember the first store. if (ByteOffsetFromBase < FirstOffset) { FirstStore = Store; FirstOffset = ByteOffsetFromBase; } // Map the offset in the store and the offset in the combined value, and // early return if it has been set before. if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX) return SDValue(); OffsetMap[Offset] = ByteOffsetFromBase; } assert(FirstOffset != INT64_MAX && "First byte offset must be set"); assert(FirstStore && "First store must be set"); // Check that a store of the wide type is both allowed and fast on the target const DataLayout &Layout = DAG.getDataLayout(); bool Fast = false; bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT, *FirstStore->getMemOperand(), &Fast); if (!Allowed || !Fast) return SDValue(); // Check if the pieces of the value are going to the expected places in memory // to merge the stores. auto checkOffsets = [&](bool MatchLittleEndian) { if (MatchLittleEndian) { for (unsigned i = 0; i != NumStores; ++i) if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset) return false; } else { // MatchBigEndian by reversing loop counter. for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j) if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset) return false; } return true; }; // Check if the offsets line up for the native data layout of this target. bool NeedBswap = false; bool NeedRotate = false; if (!checkOffsets(Layout.isLittleEndian())) { // Special-case: check if byte offsets line up for the opposite endian. if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian())) NeedBswap = true; else if (NumStores == 2 && checkOffsets(Layout.isBigEndian())) NeedRotate = true; else return SDValue(); } SDLoc DL(N); if (WideVT != SourceValue.getValueType()) { assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits && "Unexpected store value to merge"); SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue); } // Before legalize we can introduce illegal bswaps/rotates which will be later // converted to an explicit bswap sequence. This way we end up with a single // store and byte shuffling instead of several stores and byte shuffling. if (NeedBswap) { SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue); } else if (NeedRotate) { assert(WideNumBits % 2 == 0 && "Unexpected type for rotate"); SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT); SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt); } SDValue NewStore = DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(), FirstStore->getPointerInfo(), FirstStore->getAlign()); // Rely on other DAG combine rules to remove the other individual stores. DAG.ReplaceAllUsesWith(N, NewStore.getNode()); return NewStore; } /// Match a pattern where a wide type scalar value is loaded by several narrow /// loads and combined by shifts and ors. Fold it into a single load or a load /// and a BSWAP if the targets supports it. /// /// Assuming little endian target: /// i8 *a = ... /// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24) /// => /// i32 val = *((i32)a) /// /// i8 *a = ... /// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3] /// => /// i32 val = BSWAP(*((i32)a)) /// /// TODO: This rule matches complex patterns with OR node roots and doesn't /// interact well with the worklist mechanism. When a part of the pattern is /// updated (e.g. one of the loads) its direct users are put into the worklist, /// but the root node of the pattern which triggers the load combine is not /// necessarily a direct user of the changed node. For example, once the address /// of t28 load is reassociated load combine won't be triggered: /// t25: i32 = add t4, Constant:i32<2> /// t26: i64 = sign_extend t25 /// t27: i64 = add t2, t26 /// t28: i8,ch = load t0, t27, undef:i64 /// t29: i32 = zero_extend t28 /// t32: i32 = shl t29, Constant:i8<8> /// t33: i32 = or t23, t32 /// As a possible fix visitLoad can check if the load can be a part of a load /// combine pattern and add corresponding OR roots to the worklist. SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { assert(N->getOpcode() == ISD::OR && "Can only match load combining against OR nodes"); // Handles simple types only EVT VT = N->getValueType(0); if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64) return SDValue(); unsigned ByteWidth = VT.getSizeInBits() / 8; bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian(); auto MemoryByteOffset = [&] (ByteProvider P) { assert(P.isMemory() && "Must be a memory byte provider"); unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits(); assert(LoadBitWidth % 8 == 0 && "can only analyze providers for individual bytes not bit"); unsigned LoadByteWidth = LoadBitWidth / 8; return IsBigEndianTarget ? bigEndianByteAt(LoadByteWidth, P.ByteOffset) : littleEndianByteAt(LoadByteWidth, P.ByteOffset); }; Optional Base; SDValue Chain; SmallPtrSet Loads; Optional FirstByteProvider; int64_t FirstOffset = INT64_MAX; // Check if all the bytes of the OR we are looking at are loaded from the same // base address. Collect bytes offsets from Base address in ByteOffsets. SmallVector ByteOffsets(ByteWidth); unsigned ZeroExtendedBytes = 0; for (int i = ByteWidth - 1; i >= 0; --i) { auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true); if (!P) return SDValue(); if (P->isConstantZero()) { // It's OK for the N most significant bytes to be 0, we can just // zero-extend the load. if (++ZeroExtendedBytes != (ByteWidth - static_cast(i))) return SDValue(); continue; } assert(P->isMemory() && "provenance should either be memory or zero"); LoadSDNode *L = P->Load; assert(L->hasNUsesOfValue(1, 0) && L->isSimple() && !L->isIndexed() && "Must be enforced by calculateByteProvider"); assert(L->getOffset().isUndef() && "Unindexed load must have undef offset"); // All loads must share the same chain SDValue LChain = L->getChain(); if (!Chain) Chain = LChain; else if (Chain != LChain) return SDValue(); // Loads must share the same base address BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG); int64_t ByteOffsetFromBase = 0; if (!Base) Base = Ptr; else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase)) return SDValue(); // Calculate the offset of the current byte from the base address ByteOffsetFromBase += MemoryByteOffset(*P); ByteOffsets[i] = ByteOffsetFromBase; // Remember the first byte load if (ByteOffsetFromBase < FirstOffset) { FirstByteProvider = P; FirstOffset = ByteOffsetFromBase; } Loads.insert(L); } assert(!Loads.empty() && "All the bytes of the value must be loaded from " "memory, so there must be at least one load which produces the value"); assert(Base && "Base address of the accessed memory location must be set"); assert(FirstOffset != INT64_MAX && "First byte offset must be set"); bool NeedsZext = ZeroExtendedBytes > 0; EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8); if (!MemVT.isSimple()) return SDValue(); // Before legalize we can introduce too wide illegal loads which will be later // split into legal sized loads. This enables us to combine i64 load by i8 // patterns to a couple of i32 loads on 32 bit targets. if (LegalOperations && !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, MemVT)) return SDValue(); // Check if the bytes of the OR we are looking at match with either big or // little endian value load Optional IsBigEndian = isBigEndian( makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset); if (!IsBigEndian) return SDValue(); assert(FirstByteProvider && "must be set"); // Ensure that the first byte is loaded from zero offset of the first load. // So the combined value can be loaded from the first load address. if (MemoryByteOffset(*FirstByteProvider) != 0) return SDValue(); LoadSDNode *FirstLoad = FirstByteProvider->Load; // The node we are looking at matches with the pattern, check if we can // replace it with a single (possibly zero-extended) load and bswap + shift if // needed. // If the load needs byte swap check if the target supports it bool NeedsBswap = IsBigEndianTarget != *IsBigEndian; // Before legalize we can introduce illegal bswaps which will be later // converted to an explicit bswap sequence. This way we end up with a single // load and byte shuffling instead of several loads and byte shuffling. // We do not introduce illegal bswaps when zero-extending as this tends to // introduce too many arithmetic instructions. if (NeedsBswap && (LegalOperations || NeedsZext) && !TLI.isOperationLegal(ISD::BSWAP, VT)) return SDValue(); // If we need to bswap and zero extend, we have to insert a shift. Check that // it is legal. if (NeedsBswap && NeedsZext && LegalOperations && !TLI.isOperationLegal(ISD::SHL, VT)) return SDValue(); // Check that a load of the wide type is both allowed and fast on the target bool Fast = false; bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, *FirstLoad->getMemOperand(), &Fast); if (!Allowed || !Fast) return SDValue(); SDValue NewLoad = DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT, Chain, FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign()); // Transfer chain users from old loads to the new load. for (LoadSDNode *L : Loads) DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1)); if (!NeedsBswap) return NewLoad; SDValue ShiftedLoad = NeedsZext ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad, DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT, SDLoc(N), LegalOperations)) : NewLoad; return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad); } // If the target has andn, bsl, or a similar bit-select instruction, // we want to unfold masked merge, with canonical pattern of: // | A | |B| // ((x ^ y) & m) ^ y // | D | // Into: // (x & m) | (y & ~m) // If y is a constant, m is not a 'not', and the 'andn' does not work with // immediates, we unfold into a different pattern: // ~(~x & m) & (m | y) // If x is a constant, m is a 'not', and the 'andn' does not work with // immediates, we unfold into a different pattern: // (x | ~m) & ~(~m & ~y) // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at // the very least that breaks andnpd / andnps patterns, and because those // patterns are simplified in IR and shouldn't be created in the DAG SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) { assert(N->getOpcode() == ISD::XOR); // Don't touch 'not' (i.e. where y = -1). if (isAllOnesOrAllOnesSplat(N->getOperand(1))) return SDValue(); EVT VT = N->getValueType(0); // There are 3 commutable operators in the pattern, // so we have to deal with 8 possible variants of the basic pattern. SDValue X, Y, M; auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) { if (And.getOpcode() != ISD::AND || !And.hasOneUse()) return false; SDValue Xor = And.getOperand(XorIdx); if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse()) return false; SDValue Xor0 = Xor.getOperand(0); SDValue Xor1 = Xor.getOperand(1); // Don't touch 'not' (i.e. where y = -1). if (isAllOnesOrAllOnesSplat(Xor1)) return false; if (Other == Xor0) std::swap(Xor0, Xor1); if (Other != Xor1) return false; X = Xor0; Y = Xor1; M = And.getOperand(XorIdx ? 0 : 1); return true; }; SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) && !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0)) return SDValue(); // Don't do anything if the mask is constant. This should not be reachable. // InstCombine should have already unfolded this pattern, and DAGCombiner // probably shouldn't produce it, too. if (isa(M.getNode())) return SDValue(); // We can transform if the target has AndNot if (!TLI.hasAndNot(M)) return SDValue(); SDLoc DL(N); // If Y is a constant, check that 'andn' works with immediates. Unless M is // a bitwise not that would already allow ANDN to be used. if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) { assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable."); // If not, we need to do a bit more work to make sure andn is still used. SDValue NotX = DAG.getNOT(DL, X, VT); SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M); SDValue NotLHS = DAG.getNOT(DL, LHS, VT); SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y); return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS); } // If X is a constant and M is a bitwise not, check that 'andn' works with // immediates. if (!TLI.hasAndNot(X) && isBitwiseNot(M)) { assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable."); // If not, we need to do a bit more work to make sure andn is still used. SDValue NotM = M.getOperand(0); SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM); SDValue NotY = DAG.getNOT(DL, Y, VT); SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY); SDValue NotRHS = DAG.getNOT(DL, RHS, VT); return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS); } SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M); SDValue NotM = DAG.getNOT(DL, M, VT); SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM); return DAG.getNode(ISD::OR, DL, VT, LHS, RHS); } SDValue DAGCombiner::visitXOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); SDLoc DL(N); // fold (xor undef, undef) -> 0. This is a common idiom (misuse). if (N0.isUndef() && N1.isUndef()) return DAG.getConstant(0, DL, VT); // fold (xor x, undef) -> undef if (N0.isUndef()) return N0; if (N1.isUndef()) return N1; // fold (xor c1, c2) -> c1^c2 if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1})) return C; // canonicalize constant to RHS if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::XOR, DL, VT, N1, N0); // fold vector ops if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; // fold (xor x, 0) -> x, vector edition if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) return N0; } // fold (xor x, 0) -> x if (isNullConstant(N1)) return N0; if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; // reassociate xor if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags())) return RXOR; // look for 'add-like' folds: // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE) if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) && isMinSignedConstant(N1)) if (SDValue Combined = visitADDLike(N)) return Combined; // fold !(x cc y) -> (x !cc y) unsigned N0Opcode = N0.getOpcode(); SDValue LHS, RHS, CC; if (TLI.isConstTrueVal(N1) && isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true)) { ISD::CondCode NotCC = ISD::getSetCCInverse(cast(CC)->get(), LHS.getValueType()); if (!LegalOperations || TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) { switch (N0Opcode) { default: llvm_unreachable("Unhandled SetCC Equivalent!"); case ISD::SETCC: return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC); case ISD::SELECT_CC: return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2), N0.getOperand(3), NotCC); case ISD::STRICT_FSETCC: case ISD::STRICT_FSETCCS: { if (N0.hasOneUse()) { // FIXME Can we handle multiple uses? Could we token factor the chain // results from the new/old setcc? SDValue SetCC = DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC, N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS); CombineTo(N, SetCC); DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1)); recursivelyDeleteUnusedNodes(N0.getNode()); return SDValue(N, 0); // Return N so it doesn't get rechecked! } break; } } } } // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y))) if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() && isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ SDValue V = N0.getOperand(0); SDLoc DL0(N0); V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V, DAG.getConstant(1, DL0, V.getValueType())); AddToWorklist(V.getNode()); return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V); } // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() && (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) { SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1); if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) { unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND; N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode()); return DAG.getNode(NewOpcode, DL, VT, N00, N01); } } // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants if (isAllOnesConstant(N1) && N0.hasOneUse() && (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) { SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1); if (isa(N01) || isa(N00)) { unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND; N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode()); return DAG.getNode(NewOpcode, DL, VT, N00, N01); } } // fold (not (neg x)) -> (add X, -1) // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if // Y is a constant or the subtract has a single use. if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB && isNullConstant(N0.getOperand(0))) { return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), DAG.getAllOnesConstant(DL, VT)); } // fold (not (add X, -1)) -> (neg X) if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD && isAllOnesOrAllOnesSplat(N0.getOperand(1))) { return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0.getOperand(0)); } // fold (xor (and x, y), y) -> (and (not x), y) if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) { SDValue X = N0.getOperand(0); SDValue NotX = DAG.getNOT(SDLoc(X), X, VT); AddToWorklist(NotX.getNode()); return DAG.getNode(ISD::AND, DL, VT, NotX, N1); } // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X) if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { SDValue A = N0Opcode == ISD::ADD ? N0 : N1; SDValue S = N0Opcode == ISD::SRA ? N0 : N1; if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) { SDValue A0 = A.getOperand(0), A1 = A.getOperand(1); SDValue S0 = S.getOperand(0); if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1))) if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1)) return DAG.getNode(ISD::ABS, DL, VT, S0); } } // fold (xor x, x) -> 0 if (N0 == N1) return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations); // fold (xor (shl 1, x), -1) -> (rotl ~1, x) // Here is a concrete example of this equivalence: // i16 x == 14 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111 // // => // // i16 ~1 == 0b1111111111111110 // i16 rol(~1, 14) == 0b1011111111111111 // // Some additional tips to help conceptualize this transform: // - Try to see the operation as placing a single zero in a value of all ones. // - There exists no value for x which would allow the result to contain zero. // - Values of x larger than the bitwidth are undefined and do not require a // consistent result. // - Pushing the zero left requires shifting one bits in from the right. // A rotate left of ~1 is a nice way of achieving the desired result. if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) { return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT), N0.getOperand(1)); } // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) if (N0Opcode == N1.getOpcode()) if (SDValue V = hoistLogicOpWithSameOpcodeHands(N)) return V; if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG)) return R; if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG)) return R; // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable if (SDValue MM = unfoldMaskedMerge(N)) return MM; // Simplify the expression using non-local knowledge. if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N)) return Combined; return SDValue(); } /// If we have a shift-by-constant of a bitwise logic op that itself has a /// shift-by-constant operand with identical opcode, we may be able to convert /// that into 2 independent shifts followed by the logic op. This is a /// throughput improvement. static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) { // Match a one-use bitwise logic op. SDValue LogicOp = Shift->getOperand(0); if (!LogicOp.hasOneUse()) return SDValue(); unsigned LogicOpcode = LogicOp.getOpcode(); if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR && LogicOpcode != ISD::XOR) return SDValue(); // Find a matching one-use shift by constant. unsigned ShiftOpcode = Shift->getOpcode(); SDValue C1 = Shift->getOperand(1); ConstantSDNode *C1Node = isConstOrConstSplat(C1); assert(C1Node && "Expected a shift with constant operand"); const APInt &C1Val = C1Node->getAPIntValue(); auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp, const APInt *&ShiftAmtVal) { if (V.getOpcode() != ShiftOpcode || !V.hasOneUse()) return false; ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1)); if (!ShiftCNode) return false; // Capture the shifted operand and shift amount value. ShiftOp = V.getOperand(0); ShiftAmtVal = &ShiftCNode->getAPIntValue(); // Shift amount types do not have to match their operand type, so check that // the constants are the same width. if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth()) return false; // The fold is not valid if the sum of the shift values exceeds bitwidth. if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits())) return false; return true; }; // Logic ops are commutative, so check each operand for a match. SDValue X, Y; const APInt *C0Val; if (matchFirstShift(LogicOp.getOperand(0), X, C0Val)) Y = LogicOp.getOperand(1); else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val)) Y = LogicOp.getOperand(0); else return SDValue(); // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1) SDLoc DL(Shift); EVT VT = Shift->getValueType(0); EVT ShiftAmtVT = Shift->getOperand(1).getValueType(); SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT); SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC); SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1); return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2); } /// Handle transforms common to the three shifts, when the shift amount is a /// constant. /// We are looking for: (shift being one of shl/sra/srl) /// shift (binop X, C0), C1 /// And want to transform into: /// binop (shift X, C1), (shift C0, C1) SDValue DAGCombiner::visitShiftByConstant(SDNode *N) { assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand"); // Do not turn a 'not' into a regular xor. if (isBitwiseNot(N->getOperand(0))) return SDValue(); // The inner binop must be one-use, since we want to replace it. SDValue LHS = N->getOperand(0); if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level)) return SDValue(); // TODO: This is limited to early combining because it may reveal regressions // otherwise. But since we just checked a target hook to see if this is // desirable, that should have filtered out cases where this interferes // with some other pattern matching. if (!LegalTypes) if (SDValue R = combineShiftOfShiftedLogic(N, DAG)) return R; // We want to pull some binops through shifts, so that we have (and (shift)) // instead of (shift (and)), likewise for add, or, xor, etc. This sort of // thing happens with address calculations, so it's important to canonicalize // it. switch (LHS.getOpcode()) { default: return SDValue(); case ISD::OR: case ISD::XOR: case ISD::AND: break; case ISD::ADD: if (N->getOpcode() != ISD::SHL) return SDValue(); // only shl(add) not sr[al](add). break; } // We require the RHS of the binop to be a constant and not opaque as well. ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1)); if (!BinOpCst) return SDValue(); // FIXME: disable this unless the input to the binop is a shift by a constant // or is copy/select. Enable this in other cases when figure out it's exactly // profitable. SDValue BinOpLHSVal = LHS.getOperand(0); bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL || BinOpLHSVal.getOpcode() == ISD::SRA || BinOpLHSVal.getOpcode() == ISD::SRL) && isa(BinOpLHSVal.getOperand(1)); bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg || BinOpLHSVal.getOpcode() == ISD::SELECT; if (!IsShiftByConstant && !IsCopyOrSelect) return SDValue(); if (IsCopyOrSelect && N->hasOneUse()) return SDValue(); // Fold the constants, shifting the binop RHS by the shift amount. SDLoc DL(N); EVT VT = N->getValueType(0); SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1), N->getOperand(1)); assert(isa(NewRHS) && "Folding was not successful!"); SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0), N->getOperand(1)); return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS); } SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { assert(N->getOpcode() == ISD::TRUNCATE); assert(N->getOperand(0).getOpcode() == ISD::AND); // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC) EVT TruncVT = N->getValueType(0); if (N->hasOneUse() && N->getOperand(0).hasOneUse() && TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) { SDValue N01 = N->getOperand(0).getOperand(1); if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) { SDLoc DL(N); SDValue N00 = N->getOperand(0).getOperand(0); SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00); SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01); AddToWorklist(Trunc00.getNode()); AddToWorklist(Trunc01.getNode()); return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01); } } return SDValue(); } SDValue DAGCombiner::visitRotate(SDNode *N) { SDLoc dl(N); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); unsigned Bitsize = VT.getScalarSizeInBits(); // fold (rot x, 0) -> x if (isNullOrNullSplat(N1)) return N0; // fold (rot x, c) -> x iff (c % BitSize) == 0 if (isPowerOf2_32(Bitsize) && Bitsize > 1) { APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1); if (DAG.MaskedValueIsZero(N1, ModuloMask)) return N0; } // fold (rot x, c) -> (rot x, c % BitSize) bool OutOfRange = false; auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) { OutOfRange |= C->getAPIntValue().uge(Bitsize); return true; }; if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) { EVT AmtVT = N1.getValueType(); SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT); if (SDValue Amt = DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits})) return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt); } // rot i16 X, 8 --> bswap X auto *RotAmtC = isConstOrConstSplat(N1); if (RotAmtC && RotAmtC->getAPIntValue() == 8 && VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT)) return DAG.getNode(ISD::BSWAP, dl, VT, N0); // Simplify the operands using demanded-bits information. if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getOpcode() == ISD::AND) { if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1); } unsigned NextOp = N0.getOpcode(); // fold (rot* (rot* x, c2), c1) // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize)) % bitsize) if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) { SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1); SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)); if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) { EVT ShiftVT = C1->getValueType(0); bool SameSide = (N->getOpcode() == NextOp); unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB; SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT); SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT, {N1, BitsizeC}); SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT, {N0.getOperand(1), BitsizeC}); if (Norm1 && Norm2) if (SDValue CombinedShift = DAG.FoldConstantArithmetic( CombineOp, dl, ShiftVT, {Norm1, Norm2})) { SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic( ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC}); return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0), CombinedShiftNorm); } } } return SDValue(); } SDValue DAGCombiner::visitSHL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); if (SDValue V = DAG.simplifyShift(N0, N1)) return V; EVT VT = N0.getValueType(); EVT ShiftVT = N1.getValueType(); unsigned OpSizeInBits = VT.getScalarSizeInBits(); // fold (shl c1, c2) -> c1<(N1); // If setcc produces all-one true value then: // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<isConstant()) { if (N0.getOpcode() == ISD::AND) { SDValue N00 = N0->getOperand(0); SDValue N01 = N0->getOperand(1); BuildVectorSDNode *N01CV = dyn_cast(N01); if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC && TLI.getBooleanContents(N00.getOperand(0).getValueType()) == TargetLowering::ZeroOrNegativeOneBooleanContent) { if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1})) return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); } } } } if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; // if (shl x, c) is known to be zero, return 0 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits))) return DAG.getConstant(0, SDLoc(N), VT); // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getOpcode() == ISD::AND) { if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1); } if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) if (N0.getOpcode() == ISD::SHL) { auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS, ConstantSDNode *RHS) { APInt c1 = LHS->getAPIntValue(); APInt c2 = RHS->getAPIntValue(); zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); return (c1 + c2).uge(OpSizeInBits); }; if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) return DAG.getConstant(0, SDLoc(N), VT); auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS, ConstantSDNode *RHS) { APInt c1 = LHS->getAPIntValue(); APInt c2 = RHS->getAPIntValue(); zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); return (c1 + c2).ult(OpSizeInBits); }; if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { SDLoc DL(N); SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1)); return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum); } } // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2)) // For this to be valid, the second form must not preserve any of the bits // that are shifted out by the inner shift in the first form. This means // the outer shift size must be >= the number of bits added by the ext. // As a corollary, we don't care what kind of ext it is. if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND) && N0.getOperand(0).getOpcode() == ISD::SHL) { SDValue N0Op0 = N0.getOperand(0); SDValue InnerShiftAmt = N0Op0.getOperand(1); EVT InnerVT = N0Op0.getValueType(); uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits(); auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS, ConstantSDNode *RHS) { APInt c1 = LHS->getAPIntValue(); APInt c2 = RHS->getAPIntValue(); zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); return c2.uge(OpSizeInBits - InnerBitwidth) && (c1 + c2).uge(OpSizeInBits); }; if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange, /*AllowUndefs*/ false, /*AllowTypeMismatch*/ true)) return DAG.getConstant(0, SDLoc(N), VT); auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS, ConstantSDNode *RHS) { APInt c1 = LHS->getAPIntValue(); APInt c2 = RHS->getAPIntValue(); zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); return c2.uge(OpSizeInBits - InnerBitwidth) && (c1 + c2).ult(OpSizeInBits); }; if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange, /*AllowUndefs*/ false, /*AllowTypeMismatch*/ true)) { SDLoc DL(N); SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0)); SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT); Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1); return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum); } } // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C)) // Only fold this if the inner zext has no other uses to avoid increasing // the total number of instructions. if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() && N0.getOperand(0).getOpcode() == ISD::SRL) { SDValue N0Op0 = N0.getOperand(0); SDValue InnerShiftAmt = N0Op0.getOperand(1); auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) { APInt c1 = LHS->getAPIntValue(); APInt c2 = RHS->getAPIntValue(); zeroExtendToMatch(c1, c2); return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2); }; if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual, /*AllowUndefs*/ false, /*AllowTypeMismatch*/ true)) { SDLoc DL(N); EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType(); SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT); NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL); AddToWorklist(NewSHL.getNode()); return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); } } if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) { auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS, ConstantSDNode *RHS) { const APInt &LHSC = LHS->getAPIntValue(); const APInt &RHSC = RHS->getAPIntValue(); return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) && LHSC.getZExtValue() <= RHSC.getZExtValue(); }; SDLoc DL(N); // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2 if (N0->getFlags().hasExact()) { if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount, /*AllowUndefs*/ false, /*AllowTypeMismatch*/ true)) { SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT); SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01); return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff); } if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount, /*AllowUndefs*/ false, /*AllowTypeMismatch*/ true)) { SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT); SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1); return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff); } } // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or // (and (srl x, (sub c1, c2), MASK) // Only fold this if the inner shift has no other uses -- if it does, // folding this will increase the total number of instructions. if (N0.getOpcode() == ISD::SRL && (N0.getOperand(1) == N1 || N0.hasOneUse()) && TLI.shouldFoldConstantShiftPairToMask(N, Level)) { if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount, /*AllowUndefs*/ false, /*AllowTypeMismatch*/ true)) { SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT); SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1); SDValue Mask = DAG.getAllOnesConstant(DL, VT); Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01); Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff); SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff); return DAG.getNode(ISD::AND, DL, VT, Shift, Mask); } if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount, /*AllowUndefs*/ false, /*AllowTypeMismatch*/ true)) { SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT); SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01); SDValue Mask = DAG.getAllOnesConstant(DL, VT); Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1); SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff); return DAG.getNode(ISD::AND, DL, VT, Shift, Mask); } } } // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) && isConstantOrConstantVector(N1, /* No Opaques */ true)) { SDLoc DL(N); SDValue AllBits = DAG.getAllOnesConstant(DL, VT); SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1); return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask); } // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) // Variant of version done on multiply, except mul by a power of 2 is turned // into a shift. if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) && N0->hasOneUse() && isConstantOrConstantVector(N1, /* No Opaques */ true) && isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) && TLI.isDesirableToCommuteWithShift(N, Level)) { SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1); SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); AddToWorklist(Shl0.getNode()); AddToWorklist(Shl1.getNode()); return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1); } // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2) if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) { SDValue N01 = N0.getOperand(1); if (SDValue Shl = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl); } ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N1C && !N1C->isOpaque()) if (SDValue NewSHL = visitShiftByConstant(N)) return NewSHL; // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)). if (N0.getOpcode() == ISD::VSCALE) if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) { const APInt &C0 = N0.getConstantOperandAPInt(0); const APInt &C1 = NC1->getAPIntValue(); return DAG.getVScale(SDLoc(N), VT, C0 << C1); } // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)). APInt ShlVal; if (N0.getOpcode() == ISD::STEP_VECTOR) if (ISD::isConstantSplatVector(N1.getNode(), ShlVal)) { const APInt &C0 = N0.getConstantOperandAPInt(0); if (ShlVal.ult(C0.getBitWidth())) { APInt NewStep = C0 << ShlVal; return DAG.getStepVector(SDLoc(N), VT, NewStep); } } return SDValue(); } // Transform a right shift of a multiply into a multiply-high. // Examples: // (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b) // (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b) static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI) { assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && "SRL or SRA node is required here!"); // Check the shift amount. Proceed with the transformation if the shift // amount is constant. ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1)); if (!ShiftAmtSrc) return SDValue(); SDLoc DL(N); // The operation feeding into the shift must be a multiply. SDValue ShiftOperand = N->getOperand(0); if (ShiftOperand.getOpcode() != ISD::MUL) return SDValue(); // Both operands must be equivalent extend nodes. SDValue LeftOp = ShiftOperand.getOperand(0); SDValue RightOp = ShiftOperand.getOperand(1); bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND; bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND; if (!IsSignExt && !IsZeroExt) return SDValue(); EVT NarrowVT = LeftOp.getOperand(0).getValueType(); unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits(); SDValue MulhRightOp; if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) { unsigned ActiveBits = IsSignExt ? Constant->getAPIntValue().getMinSignedBits() : Constant->getAPIntValue().getActiveBits(); if (ActiveBits > NarrowVTSize) return SDValue(); MulhRightOp = DAG.getConstant( Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL, NarrowVT); } else { if (LeftOp.getOpcode() != RightOp.getOpcode()) return SDValue(); // Check that the two extend nodes are the same type. if (NarrowVT != RightOp.getOperand(0).getValueType()) return SDValue(); MulhRightOp = RightOp.getOperand(0); } EVT WideVT = LeftOp.getValueType(); // Proceed with the transformation if the wide types match. assert((WideVT == RightOp.getValueType()) && "Cannot have a multiply node with two different operand types."); // Proceed with the transformation if the wide type is twice as large // as the narrow type. if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize) return SDValue(); // Check the shift amount with the narrow type size. // Proceed with the transformation if the shift amount is the width // of the narrow type. unsigned ShiftAmt = ShiftAmtSrc->getZExtValue(); if (ShiftAmt != NarrowVTSize) return SDValue(); // If the operation feeding into the MUL is a sign extend (sext), // we use mulhs. Othewise, zero extends (zext) use mulhu. unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU; // Combine to mulh if mulh is legal/custom for the narrow type on the target. if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT)) return SDValue(); SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp); return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT) : DAG.getZExtOrTrunc(Result, DL, WideVT)); } SDValue DAGCombiner::visitSRA(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); if (SDValue V = DAG.simplifyShift(N0, N1)) return V; EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarSizeInBits(); // fold (sra c1, c2) -> (sra c1, c2) if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1})) return C; // Arithmetic shifting an all-sign-bit value is a no-op. // fold (sra 0, x) -> 0 // fold (sra -1, x) -> -1 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits) return N0; // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) return FoldedVOp; if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports // sext_inreg. ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue(); EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); if (VT.isVector()) ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, VT.getVectorElementCount()); if (!LegalOperations || TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) == TargetLowering::Legal) return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0), DAG.getValueType(ExtVT)); // Even if we can't convert to sext_inreg, we might be able to remove // this shift pair if the input is already sign extended. if (DAG.ComputeNumSignBits(N0.getOperand(0)) > N1C->getZExtValue()) return N0.getOperand(0); } // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) // clamp (add c1, c2) to max shift. if (N0.getOpcode() == ISD::SRA) { SDLoc DL(N); EVT ShiftVT = N1.getValueType(); EVT ShiftSVT = ShiftVT.getScalarType(); SmallVector ShiftValues; auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) { APInt c1 = LHS->getAPIntValue(); APInt c2 = RHS->getAPIntValue(); zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); APInt Sum = c1 + c2; unsigned ShiftSum = Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue(); ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT)); return true; }; if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) { SDValue ShiftValue; if (N1.getOpcode() == ISD::BUILD_VECTOR) ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues); else if (N1.getOpcode() == ISD::SPLAT_VECTOR) { assert(ShiftValues.size() == 1 && "Expected matchBinaryPredicate to return one element for " "SPLAT_VECTORs"); ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]); } else ShiftValue = ShiftValues[0]; return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue); } } // fold (sra (shl X, m), (sub result_size, n)) // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for // result_size - n != m. // If truncate is free for the target sext(shl) is likely to result in better // code. if (N0.getOpcode() == ISD::SHL && N1C) { // Get the two constanst of the shifts, CN0 = m, CN = n. const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1)); if (N01C) { LLVMContext &Ctx = *DAG.getContext(); // Determine what the truncate's result bitsize and type would be. EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue()); if (VT.isVector()) TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount()); // Determine the residual right-shift amount. int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); // If the shift is not a no-op (in which case this should be just a sign // extend already), the truncated to type is legal, sign_extend is legal // on that type, and the truncate to that type is both legal and free, // perform the transform. if ((ShiftAmt > 0) && TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) && TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && TLI.isTruncateFree(VT, TruncVT)) { SDLoc DL(N); SDValue Amt = DAG.getConstant(ShiftAmt, DL, getShiftAmountTy(N0.getOperand(0).getValueType())); SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Amt); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Shift); return DAG.getNode(ISD::SIGN_EXTEND, DL, N->getValueType(0), Trunc); } } } // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper. // sra (add (shl X, N1C), AddC), N1C --> // sext (add (trunc X to (width - N1C)), AddC') // sra (sub AddC, (shl X, N1C)), N1C --> // sext (sub AddC1',(trunc X to (width - N1C))) if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C && N0.hasOneUse()) { bool IsAdd = N0.getOpcode() == ISD::ADD; SDValue Shl = N0.getOperand(IsAdd ? 0 : 1); if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 && Shl.hasOneUse()) { // TODO: AddC does not need to be a splat. if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) { // Determine what the truncate's type would be and ask the target if // that is a free operation. LLVMContext &Ctx = *DAG.getContext(); unsigned ShiftAmt = N1C->getZExtValue(); EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt); if (VT.isVector()) TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount()); // TODO: The simple type check probably belongs in the default hook // implementation and/or target-specific overrides (because // non-simple types likely require masking when legalized), but // that restriction may conflict with other transforms. if (TruncVT.isSimple() && isTypeLegal(TruncVT) && TLI.isTruncateFree(VT, TruncVT)) { SDLoc DL(N); SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT); SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc( TruncVT.getScalarSizeInBits()), DL, TruncVT); SDValue Add; if (IsAdd) Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC); else Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc); return DAG.getSExtOrTrunc(Add, DL, VT); } } } } // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getOpcode() == ISD::AND) { if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1); } // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2)) // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2)) // if c1 is equal to the number of bits the trunc removes // TODO - support non-uniform vector shift amounts. if (N0.getOpcode() == ISD::TRUNCATE && (N0.getOperand(0).getOpcode() == ISD::SRL || N0.getOperand(0).getOpcode() == ISD::SRA) && N0.getOperand(0).hasOneUse() && N0.getOperand(0).getOperand(1).hasOneUse() && N1C) { SDValue N0Op0 = N0.getOperand(0); if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) { EVT LargeVT = N0Op0.getValueType(); unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits; if (LargeShift->getAPIntValue() == TruncBits) { SDLoc DL(N); EVT LargeShiftVT = getShiftAmountTy(LargeVT); SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT); Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt, DAG.getConstant(TruncBits, DL, LargeShiftVT)); SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt); return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA); } } } // Simplify, based on bits shifted out of the LHS. if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); // If the sign bit is known to be zero, switch this to a SRL. if (DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); if (N1C && !N1C->isOpaque()) if (SDValue NewSRA = visitShiftByConstant(N)) return NewSRA; // Try to transform this shift into a multiply-high if // it matches the appropriate pattern detected in combineShiftToMULH. if (SDValue MULH = combineShiftToMULH(N, DAG, TLI)) return MULH; // Attempt to convert a sra of a load into a narrower sign-extending load. if (SDValue NarrowLoad = reduceLoadWidth(N)) return NarrowLoad; return SDValue(); } SDValue DAGCombiner::visitSRL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); if (SDValue V = DAG.simplifyShift(N0, N1)) return V; EVT VT = N0.getValueType(); EVT ShiftVT = N1.getValueType(); unsigned OpSizeInBits = VT.getScalarSizeInBits(); // fold (srl c1, c2) -> c1 >>u c2 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1})) return C; // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) return FoldedVOp; if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; // if (srl x, c) is known to be zero, return 0 ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits))) return DAG.getConstant(0, SDLoc(N), VT); // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) if (N0.getOpcode() == ISD::SRL) { auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS, ConstantSDNode *RHS) { APInt c1 = LHS->getAPIntValue(); APInt c2 = RHS->getAPIntValue(); zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); return (c1 + c2).uge(OpSizeInBits); }; if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) return DAG.getConstant(0, SDLoc(N), VT); auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS, ConstantSDNode *RHS) { APInt c1 = LHS->getAPIntValue(); APInt c2 = RHS->getAPIntValue(); zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); return (c1 + c2).ult(OpSizeInBits); }; if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { SDLoc DL(N); SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1)); return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum); } } if (N1C && N0.getOpcode() == ISD::TRUNCATE && N0.getOperand(0).getOpcode() == ISD::SRL) { SDValue InnerShift = N0.getOperand(0); // TODO - support non-uniform vector shift amounts. if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) { uint64_t c1 = N001C->getZExtValue(); uint64_t c2 = N1C->getZExtValue(); EVT InnerShiftVT = InnerShift.getValueType(); EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType(); uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2))) // This is only valid if the OpSizeInBits + c1 = size of inner shift. if (c1 + OpSizeInBits == InnerShiftSize) { SDLoc DL(N); if (c1 + c2 >= InnerShiftSize) return DAG.getConstant(0, DL, VT); SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT); SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT, InnerShift.getOperand(0), NewShiftAmt); return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift); } // In the more general case, we can clear the high bits after the shift: // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask) if (N0.hasOneUse() && InnerShift.hasOneUse() && c1 + c2 < InnerShiftSize) { SDLoc DL(N); SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT); SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT, InnerShift.getOperand(0), NewShiftAmt); SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize, OpSizeInBits - c2), DL, InnerShiftVT); SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask); return DAG.getNode(ISD::TRUNCATE, DL, VT, And); } } } // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or // (and (srl x, (sub c2, c1), MASK) if (N0.getOpcode() == ISD::SHL && (N0.getOperand(1) == N1 || N0->hasOneUse()) && TLI.shouldFoldConstantShiftPairToMask(N, Level)) { auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS, ConstantSDNode *RHS) { const APInt &LHSC = LHS->getAPIntValue(); const APInt &RHSC = RHS->getAPIntValue(); return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) && LHSC.getZExtValue() <= RHSC.getZExtValue(); }; if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount, /*AllowUndefs*/ false, /*AllowTypeMismatch*/ true)) { SDLoc DL(N); SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT); SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1); SDValue Mask = DAG.getAllOnesConstant(DL, VT); Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01); Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff); SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff); return DAG.getNode(ISD::AND, DL, VT, Shift, Mask); } if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount, /*AllowUndefs*/ false, /*AllowTypeMismatch*/ true)) { SDLoc DL(N); SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT); SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01); SDValue Mask = DAG.getAllOnesConstant(DL, VT); Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1); SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff); return DAG.getNode(ISD::AND, DL, VT, Shift, Mask); } } // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask) // TODO - support non-uniform vector shift amounts. if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { // Shifting in all undef bits? EVT SmallVT = N0.getOperand(0).getValueType(); unsigned BitSize = SmallVT.getScalarSizeInBits(); if (N1C->getAPIntValue().uge(BitSize)) return DAG.getUNDEF(VT); if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { uint64_t ShiftAmt = N1C->getZExtValue(); SDLoc DL0(N0); SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT, N0.getOperand(0), DAG.getConstant(ShiftAmt, DL0, getShiftAmountTy(SmallVT))); AddToWorklist(SmallShift.getNode()); APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt); SDLoc DL(N); return DAG.getNode(ISD::AND, DL, VT, DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift), DAG.getConstant(Mask, DL, VT)); } } // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign // bit, which is unmodified by sra. if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) { if (N0.getOpcode() == ISD::SRA) return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1); } // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). if (N1C && N0.getOpcode() == ISD::CTLZ && N1C->getAPIntValue() == Log2_32(OpSizeInBits)) { KnownBits Known = DAG.computeKnownBits(N0.getOperand(0)); // If any of the input bits are KnownOne, then the input couldn't be all // zeros, thus the result of the srl will always be zero. if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT); // If all of the bits input the to ctlz node are known to be zero, then // the result of the ctlz is "32" and the result of the shift is one. APInt UnknownBits = ~Known.Zero; if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT); // Otherwise, check to see if there is exactly one bit input to the ctlz. if (UnknownBits.isPowerOf2()) { // Okay, we know that only that the single bit specified by UnknownBits // could be set on input to the CTLZ node. If this bit is set, the SRL // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair // to an SRL/XOR pair, which is likely to simplify more. unsigned ShAmt = UnknownBits.countTrailingZeros(); SDValue Op = N0.getOperand(0); if (ShAmt) { SDLoc DL(N0); Op = DAG.getNode(ISD::SRL, DL, VT, Op, DAG.getConstant(ShAmt, DL, getShiftAmountTy(Op.getValueType()))); AddToWorklist(Op.getNode()); } SDLoc DL(N); return DAG.getNode(ISD::XOR, DL, VT, Op, DAG.getConstant(1, DL, VT)); } } // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getOpcode() == ISD::AND) { if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1); } // fold operands of srl based on knowledge that the low bits are not // demanded. if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); if (N1C && !N1C->isOpaque()) if (SDValue NewSRL = visitShiftByConstant(N)) return NewSRL; // Attempt to convert a srl of a load into a narrower zero-extending load. if (SDValue NarrowLoad = reduceLoadWidth(N)) return NarrowLoad; // Here is a common situation. We want to optimize: // // %a = ... // %b = and i32 %a, 2 // %c = srl i32 %b, 1 // brcond i32 %c ... // // into // // %a = ... // %b = and %a, 2 // %c = setcc eq %b, 0 // brcond %c ... // // However when after the source operand of SRL is optimized into AND, the SRL // itself may not be optimized further. Look for it and add the BRCOND into // the worklist. if (N->hasOneUse()) { SDNode *Use = *N->use_begin(); if (Use->getOpcode() == ISD::BRCOND) AddToWorklist(Use); else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) { // Also look pass the truncate. Use = *Use->use_begin(); if (Use->getOpcode() == ISD::BRCOND) AddToWorklist(Use); } } // Try to transform this shift into a multiply-high if // it matches the appropriate pattern detected in combineShiftToMULH. if (SDValue MULH = combineShiftToMULH(N, DAG, TLI)) return MULH; return SDValue(); } SDValue DAGCombiner::visitFunnelShift(SDNode *N) { EVT VT = N->getValueType(0); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); bool IsFSHL = N->getOpcode() == ISD::FSHL; unsigned BitWidth = VT.getScalarSizeInBits(); // fold (fshl N0, N1, 0) -> N0 // fold (fshr N0, N1, 0) -> N1 if (isPowerOf2_32(BitWidth)) if (DAG.MaskedValueIsZero( N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1))) return IsFSHL ? N0 : N1; auto IsUndefOrZero = [](SDValue V) { return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true); }; // TODO - support non-uniform vector shift amounts. if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) { EVT ShAmtTy = N2.getValueType(); // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth) if (Cst->getAPIntValue().uge(BitWidth)) { uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth); return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1, DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy)); } unsigned ShAmt = Cst->getZExtValue(); if (ShAmt == 0) return IsFSHL ? N0 : N1; // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C) // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C) // fold fshl(N0, undef_or_zero, C) -> shl(N0, C) // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C) if (IsUndefOrZero(N0)) return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt, SDLoc(N), ShAmtTy)); if (IsUndefOrZero(N1)) return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt, SDLoc(N), ShAmtTy)); // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive. // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive. // TODO - bigendian support once we have test coverage. // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine? // TODO - permit LHS EXTLOAD if extensions are shifted out. if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() && !DAG.getDataLayout().isBigEndian()) { auto *LHS = dyn_cast(N0); auto *RHS = dyn_cast(N1); if (LHS && RHS && LHS->isSimple() && RHS->isSimple() && LHS->getAddressSpace() == RHS->getAddressSpace() && (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) && ISD::isNON_EXTLoad(LHS)) { if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) { SDLoc DL(RHS); uint64_t PtrOff = IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8); Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff); bool Fast = false; if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, RHS->getAddressSpace(), NewAlign, RHS->getMemOperand()->getFlags(), &Fast) && Fast) { SDValue NewPtr = DAG.getMemBasePlusOffset( RHS->getBasePtr(), TypeSize::Fixed(PtrOff), DL); AddToWorklist(NewPtr.getNode()); SDValue Load = DAG.getLoad( VT, DL, RHS->getChain(), NewPtr, RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign, RHS->getMemOperand()->getFlags(), RHS->getAAInfo()); // Replace the old load's chain with the new load's chain. WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1)); return Load; } } } } } // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2) // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2) // iff We know the shift amount is in range. // TODO: when is it worth doing SUB(BW, N2) as well? if (isPowerOf2_32(BitWidth)) { APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1); if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits)) return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2); if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits)) return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2); } // fold (fshl N0, N0, N2) -> (rotl N0, N2) // fold (fshr N0, N0, N2) -> (rotr N0, N2) // TODO: Investigate flipping this rotate if only one is legal, if funnel shift // is legal as well we might be better off avoiding non-constant (BW - N2). unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR; if (N0 == N1 && hasOperation(RotOpc, VT)) return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2); // Simplify, based on bits shifted out of N0/N1. if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); return SDValue(); } SDValue DAGCombiner::visitSHLSAT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); if (SDValue V = DAG.simplifyShift(N0, N1)) return V; EVT VT = N0.getValueType(); // fold (*shlsat c1, c2) -> c1<getOpcode(), SDLoc(N), VT, {N0, N1})) return C; ConstantSDNode *N1C = isConstOrConstSplat(N1); if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) { // fold (sshlsat x, c) -> (shl x, c) if (N->getOpcode() == ISD::SSHLSAT && N1C && N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0))) return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1); // fold (ushlsat x, c) -> (shl x, c) if (N->getOpcode() == ISD::USHLSAT && N1C && N1C->getAPIntValue().ule( DAG.computeKnownBits(N0).countMinLeadingZeros())) return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1); } return SDValue(); } // Given a ABS node, detect the following pattern: // (ABS (SUB (EXTEND a), (EXTEND b))). // Generates UABD/SABD instruction. static SDValue combineABSToABD(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI) { SDValue AbsOp1 = N->getOperand(0); SDValue Op0, Op1; if (AbsOp1.getOpcode() != ISD::SUB) return SDValue(); Op0 = AbsOp1.getOperand(0); Op1 = AbsOp1.getOperand(1); unsigned Opc0 = Op0.getOpcode(); // Check if the operands of the sub are (zero|sign)-extended. if (Opc0 != Op1.getOpcode() || (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND)) return SDValue(); EVT VT = N->getValueType(0); EVT VT1 = Op0.getOperand(0).getValueType(); EVT VT2 = Op1.getOperand(0).getValueType(); unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU; // fold abs(sext(x) - sext(y)) -> zext(abds(x, y)) // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y)) // NOTE: Extensions must be equivalent. if (VT1 == VT2 && TLI.isOperationLegalOrCustom(ABDOpcode, VT1)) { Op0 = Op0.getOperand(0); Op1 = Op1.getOperand(0); SDValue ABD = DAG.getNode(ABDOpcode, SDLoc(N), VT1, Op0, Op1); return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, ABD); } // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y)) // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y)) if (TLI.isOperationLegalOrCustom(ABDOpcode, VT)) return DAG.getNode(ABDOpcode, SDLoc(N), VT, Op0, Op1); return SDValue(); } SDValue DAGCombiner::visitABS(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // fold (abs c1) -> c2 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0); // fold (abs (abs x)) -> (abs x) if (N0.getOpcode() == ISD::ABS) return N0; // fold (abs x) -> x iff not-negative if (DAG.SignBitIsZero(N0)) return N0; if (SDValue ABD = combineABSToABD(N, DAG, TLI)) return ABD; return SDValue(); } SDValue DAGCombiner::visitBSWAP(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); SDLoc DL(N); // fold (bswap c1) -> c2 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::BSWAP, DL, VT, N0); // fold (bswap (bswap x)) -> x if (N0.getOpcode() == ISD::BSWAP) return N0.getOperand(0); // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse // isn't supported, it will be expanded to bswap followed by a manual reversal // of bits in each byte. By placing bswaps before bitreverse, we can remove // the two bswaps if the bitreverse gets expanded. if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) { SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0)); return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap); } // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2)))))) // iff x >= bw/2 (i.e. lower half is known zero) unsigned BW = VT.getScalarSizeInBits(); if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) { auto *ShAmt = dyn_cast(N0.getOperand(1)); EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2); if (ShAmt && ShAmt->getAPIntValue().ult(BW) && ShAmt->getZExtValue() >= (BW / 2) && (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) && TLI.isTruncateFree(VT, HalfVT) && (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) { SDValue Res = N0.getOperand(0); if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2))) Res = DAG.getNode(ISD::SHL, DL, VT, Res, DAG.getConstant(NewShAmt, DL, getShiftAmountTy(VT))); Res = DAG.getZExtOrTrunc(Res, DL, HalfVT); Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res); return DAG.getZExtOrTrunc(Res, DL, VT); } } // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as // inverse-shift-of-bswap: // bswap (X u<< C) --> (bswap X) u>> C // bswap (X u>> C) --> (bswap X) u<< C if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) && N0.hasOneUse()) { auto *ShAmt = dyn_cast(N0.getOperand(1)); if (ShAmt && ShAmt->getAPIntValue().ult(BW) && ShAmt->getZExtValue() % 8 == 0) { SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0)); unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL; return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1)); } } return SDValue(); } SDValue DAGCombiner::visitBITREVERSE(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // fold (bitreverse c1) -> c2 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0); // fold (bitreverse (bitreverse x)) -> x if (N0.getOpcode() == ISD::BITREVERSE) return N0.getOperand(0); return SDValue(); } SDValue DAGCombiner::visitCTLZ(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // fold (ctlz c1) -> c2 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0); // If the value is known never to be zero, switch to the undef version. if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) { if (DAG.isKnownNeverZero(N0)) return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); } return SDValue(); } SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // fold (ctlz_zero_undef c1) -> c2 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); return SDValue(); } SDValue DAGCombiner::visitCTTZ(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // fold (cttz c1) -> c2 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0); // If the value is known never to be zero, switch to the undef version. if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) { if (DAG.isKnownNeverZero(N0)) return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); } return SDValue(); } SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // fold (cttz_zero_undef c1) -> c2 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); return SDValue(); } SDValue DAGCombiner::visitCTPOP(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // fold (ctpop c1) -> c2 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0); return SDValue(); } // FIXME: This should be checking for no signed zeros on individual operands, as // well as no nans. static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS, const TargetLowering &TLI) { const TargetOptions &Options = DAG.getTarget().Options; EVT VT = LHS.getValueType(); return Options.NoSignedZerosFPMath && VT.isFloatingPoint() && TLI.isProfitableToCombineMinNumMaxNum(VT) && DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS); } /// Generate Min/Max node static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const TargetLowering &TLI, SelectionDAG &DAG) { if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) return SDValue(); EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); switch (CC) { case ISD::SETOLT: case ISD::SETOLE: case ISD::SETLT: case ISD::SETLE: case ISD::SETULT: case ISD::SETULE: { // Since it's known never nan to get here already, either fminnum or // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is // expanded in terms of it. unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE; if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT)) return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS); unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM; if (TLI.isOperationLegalOrCustom(Opcode, TransformVT)) return DAG.getNode(Opcode, DL, VT, LHS, RHS); return SDValue(); } case ISD::SETOGT: case ISD::SETOGE: case ISD::SETGT: case ISD::SETGE: case ISD::SETUGT: case ISD::SETUGE: { unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE; if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT)) return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS); unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM; if (TLI.isOperationLegalOrCustom(Opcode, TransformVT)) return DAG.getNode(Opcode, DL, VT, LHS, RHS); return SDValue(); } default: return SDValue(); } } /// If a (v)select has a condition value that is a sign-bit test, try to smear /// the condition operand sign-bit across the value width and use it as a mask. static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) { SDValue Cond = N->getOperand(0); SDValue C1 = N->getOperand(1); SDValue C2 = N->getOperand(2); if (!isConstantOrConstantVector(C1) || !isConstantOrConstantVector(C2)) return SDValue(); EVT VT = N->getValueType(0); if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() || VT != Cond.getOperand(0).getValueType()) return SDValue(); // The inverted-condition + commuted-select variants of these patterns are // canonicalized to these forms in IR. SDValue X = Cond.getOperand(0); SDValue CondC = Cond.getOperand(1); ISD::CondCode CC = cast(Cond.getOperand(2))->get(); if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) && isAllOnesOrAllOnesSplat(C2)) { // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1 SDLoc DL(N); SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT); SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC); return DAG.getNode(ISD::OR, DL, VT, Sra, C1); } if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) { // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1 SDLoc DL(N); SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT); SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC); return DAG.getNode(ISD::AND, DL, VT, Sra, C1); } return SDValue(); } SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { SDValue Cond = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); EVT VT = N->getValueType(0); EVT CondVT = Cond.getValueType(); SDLoc DL(N); if (!VT.isInteger()) return SDValue(); auto *C1 = dyn_cast(N1); auto *C2 = dyn_cast(N2); if (!C1 || !C2) return SDValue(); // Only do this before legalization to avoid conflicting with target-specific // transforms in the other direction (create a select from a zext/sext). There // is also a target-independent combine here in DAGCombiner in the other // direction for (select Cond, -1, 0) when the condition is not i1. if (CondVT == MVT::i1 && !LegalOperations) { if (C1->isZero() && C2->isOne()) { // select Cond, 0, 1 --> zext (!Cond) SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); if (VT != MVT::i1) NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond); return NotCond; } if (C1->isZero() && C2->isAllOnes()) { // select Cond, 0, -1 --> sext (!Cond) SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); if (VT != MVT::i1) NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond); return NotCond; } if (C1->isOne() && C2->isZero()) { // select Cond, 1, 0 --> zext (Cond) if (VT != MVT::i1) Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); return Cond; } if (C1->isAllOnes() && C2->isZero()) { // select Cond, -1, 0 --> sext (Cond) if (VT != MVT::i1) Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); return Cond; } // Use a target hook because some targets may prefer to transform in the // other direction. if (TLI.convertSelectOfConstantsToMath(VT)) { // For any constants that differ by 1, we can transform the select into an // extend and add. const APInt &C1Val = C1->getAPIntValue(); const APInt &C2Val = C2->getAPIntValue(); if (C1Val - 1 == C2Val) { // select Cond, C1, C1-1 --> add (zext Cond), C1-1 if (VT != MVT::i1) Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); } if (C1Val + 1 == C2Val) { // select Cond, C1, C1+1 --> add (sext Cond), C1+1 if (VT != MVT::i1) Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); } // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2) if (C1Val.isPowerOf2() && C2Val.isZero()) { if (VT != MVT::i1) Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); SDValue ShAmtC = DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL); return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC); } if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG)) return V; } return SDValue(); } // fold (select Cond, 0, 1) -> (xor Cond, 1) // We can't do this reliably if integer based booleans have different contents // to floating point based booleans. This is because we can't tell whether we // have an integer-based boolean or a floating-point-based boolean unless we // can find the SETCC that produced it and inspect its operands. This is // fairly easy if C is the SETCC node, but it can potentially be // undiscoverable (or not reasonably discoverable). For example, it could be // in another basic block or it could require searching a complicated // expression. if (CondVT.isInteger() && TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) == TargetLowering::ZeroOrOneBooleanContent && TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) == TargetLowering::ZeroOrOneBooleanContent && C1->isZero() && C2->isOne()) { SDValue NotCond = DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT)); if (VT.bitsEq(CondVT)) return NotCond; return DAG.getZExtOrTrunc(NotCond, DL, VT); } return SDValue(); } static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) { assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT) && "Expected a (v)select"); SDValue Cond = N->getOperand(0); SDValue T = N->getOperand(1), F = N->getOperand(2); EVT VT = N->getValueType(0); if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1) return SDValue(); // select Cond, Cond, F --> or Cond, F // select Cond, 1, F --> or Cond, F if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true)) return DAG.getNode(ISD::OR, SDLoc(N), VT, Cond, F); // select Cond, T, Cond --> and Cond, T // select Cond, T, 0 --> and Cond, T if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true)) return DAG.getNode(ISD::AND, SDLoc(N), VT, Cond, T); // select Cond, T, 1 --> or (not Cond), T if (isOneOrOneSplat(F, /* AllowUndefs */ true)) { SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT); return DAG.getNode(ISD::OR, SDLoc(N), VT, NotCond, T); } // select Cond, 0, F --> and (not Cond), F if (isNullOrNullSplat(T, /* AllowUndefs */ true)) { SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT); return DAG.getNode(ISD::AND, SDLoc(N), VT, NotCond, F); } return SDValue(); } static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); EVT VT = N->getValueType(0); if (N0.getOpcode() != ISD::SETCC || !N0.hasOneUse()) return SDValue(); SDValue Cond0 = N0.getOperand(0); SDValue Cond1 = N0.getOperand(1); ISD::CondCode CC = cast(N0.getOperand(2))->get(); if (VT != Cond0.getValueType()) return SDValue(); // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the // compare is inverted from that pattern ("Cond0 s> -1"). if (CC == ISD::SETLT && isNullOrNullSplat(Cond1)) ; // This is the pattern we are looking for. else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1)) std::swap(N1, N2); else return SDValue(); // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1 if (isNullOrNullSplat(N2)) { SDLoc DL(N); SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT); SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt); return DAG.getNode(ISD::AND, DL, VT, Sra, N1); } // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2 if (isAllOnesOrAllOnesSplat(N1)) { SDLoc DL(N); SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT); SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt); return DAG.getNode(ISD::OR, DL, VT, Sra, N2); } // If we have to invert the sign bit mask, only do that transform if the // target has a bitwise 'and not' instruction (the invert is free). // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) { SDLoc DL(N); SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT); SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt); SDValue Not = DAG.getNOT(DL, Sra, VT); return DAG.getNode(ISD::AND, DL, VT, Not, N2); } // TODO: There's another pattern in this family, but it may require // implementing hasOrNot() to check for profitability: // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2 return SDValue(); } SDValue DAGCombiner::visitSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); EVT VT = N->getValueType(0); EVT VT0 = N0.getValueType(); SDLoc DL(N); SDNodeFlags Flags = N->getFlags(); if (SDValue V = DAG.simplifySelect(N0, N1, N2)) return V; if (SDValue V = foldSelectOfConstants(N)) return V; if (SDValue V = foldBoolSelectToLogic(N, DAG)) return V; // If we can fold this based on the true/false value, do so. if (SimplifySelectOps(N, N1, N2)) return SDValue(N, 0); // Don't revisit N. if (VT0 == MVT::i1) { // The code in this block deals with the following 2 equivalences: // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y)) // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y) // The target can specify its preferred form with the // shouldNormalizeToSelectSequence() callback. However we always transform // to the right anyway if we find the inner select exists in the DAG anyway // and we always transform to the left side if we know that we can further // optimize the combination of the conditions. bool normalizeToSequence = TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT); // select (and Cond0, Cond1), X, Y // -> select Cond0, (select Cond1, X, Y), Y if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) { SDValue Cond0 = N0->getOperand(0); SDValue Cond1 = N0->getOperand(1); SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags); if (normalizeToSequence || !InnerSelect.use_empty()) return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, InnerSelect, N2, Flags); // Cleanup on failure. if (InnerSelect.use_empty()) recursivelyDeleteUnusedNodes(InnerSelect.getNode()); } // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y) if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) { SDValue Cond0 = N0->getOperand(0); SDValue Cond1 = N0->getOperand(1); SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags); if (normalizeToSequence || !InnerSelect.use_empty()) return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1, InnerSelect, Flags); // Cleanup on failure. if (InnerSelect.use_empty()) recursivelyDeleteUnusedNodes(InnerSelect.getNode()); } // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) { SDValue N1_0 = N1->getOperand(0); SDValue N1_1 = N1->getOperand(1); SDValue N1_2 = N1->getOperand(2); if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) { // Create the actual and node if we can generate good code for it. if (!normalizeToSequence) { SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0); return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2, Flags); } // Otherwise see if we can optimize the "and" to a better pattern. if (SDValue Combined = visitANDLike(N0, N1_0, N)) { return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1, N2, Flags); } } } // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) { SDValue N2_0 = N2->getOperand(0); SDValue N2_1 = N2->getOperand(1); SDValue N2_2 = N2->getOperand(2); if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) { // Create the actual or node if we can generate good code for it. if (!normalizeToSequence) { SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0); return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2, Flags); } // Otherwise see if we can optimize to a better pattern. if (SDValue Combined = visitORLike(N0, N2_0, N)) return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1, N2_2, Flags); } } } // select (not Cond), N1, N2 -> select Cond, N2, N1 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) { SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1); SelectOp->setFlags(Flags); return SelectOp; } // Fold selects based on a setcc into other things, such as min/max/abs. if (N0.getOpcode() == ISD::SETCC) { SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1); ISD::CondCode CC = cast(N0.getOperand(2))->get(); // select (fcmp lt x, y), x, y -> fminnum x, y // select (fcmp gt x, y), x, y -> fmaxnum x, y // // This is OK if we don't care what happens if either operand is a NaN. if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI)) if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC, TLI, DAG)) return FMinMax; // Use 'unsigned add with overflow' to optimize an unsigned saturating add. // This is conservatively limited to pre-legal-operations to give targets // a chance to reverse the transform if they want to do that. Also, it is // unlikely that the pattern would be formed late, so it's probably not // worth going through the other checks. if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) && CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) && N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) { auto *C = dyn_cast(N2.getOperand(1)); auto *NotC = dyn_cast(Cond1); if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) { // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) --> // uaddo Cond0, C; select uaddo.1, -1, uaddo.0 // // The IR equivalent of this transform would have this form: // %a = add %x, C // %c = icmp ugt %x, ~C // %r = select %c, -1, %a // => // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C) // %u0 = extractvalue %u, 0 // %u1 = extractvalue %u, 1 // %r = select %u1, -1, %u0 SDVTList VTs = DAG.getVTList(VT, VT0); SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1)); return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0)); } } if (TLI.isOperationLegal(ISD::SELECT_CC, VT) || (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) { // Any flags available in a select/setcc fold will be on the setcc as they // migrated from fcmp Flags = N0->getFlags(); SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2, N0.getOperand(2)); SelectNode->setFlags(Flags); return SelectNode; } if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2)) return NewSel; } if (!VT.isVector()) if (SDValue BinOp = foldSelectOfBinops(N)) return BinOp; return SDValue(); } // This function assumes all the vselect's arguments are CONCAT_VECTOR // nodes and that the condition is a BV of ConstantSDNodes (or undefs). static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { SDLoc DL(N); SDValue Cond = N->getOperand(0); SDValue LHS = N->getOperand(1); SDValue RHS = N->getOperand(2); EVT VT = N->getValueType(0); int NumElems = VT.getVectorNumElements(); assert(LHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getOpcode() == ISD::CONCAT_VECTORS && Cond.getOpcode() == ISD::BUILD_VECTOR); // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about // binary ones here. if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2) return SDValue(); // We're sure we have an even number of elements due to the // concat_vectors we have as arguments to vselect. // Skip BV elements until we find one that's not an UNDEF // After we find an UNDEF element, keep looping until we get to half the // length of the BV and see if all the non-undef nodes are the same. ConstantSDNode *BottomHalf = nullptr; for (int i = 0; i < NumElems / 2; ++i) { if (Cond->getOperand(i)->isUndef()) continue; if (BottomHalf == nullptr) BottomHalf = cast(Cond.getOperand(i)); else if (Cond->getOperand(i).getNode() != BottomHalf) return SDValue(); } // Do the same for the second half of the BuildVector ConstantSDNode *TopHalf = nullptr; for (int i = NumElems / 2; i < NumElems; ++i) { if (Cond->getOperand(i)->isUndef()) continue; if (TopHalf == nullptr) TopHalf = cast(Cond.getOperand(i)); else if (Cond->getOperand(i).getNode() != TopHalf) return SDValue(); } assert(TopHalf && BottomHalf && "One half of the selector was all UNDEFs and the other was all the " "same value. This should have been addressed before this function."); return DAG.getNode( ISD::CONCAT_VECTORS, DL, VT, BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0), TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1)); } bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled, SelectionDAG &DAG) { if (!isNullConstant(BasePtr) || Index.getOpcode() != ISD::ADD) return false; // Only perform the transformation when existing operands can be reused. if (IndexIsScaled) return false; // For now we check only the LHS of the add. SDValue LHS = Index.getOperand(0); SDValue SplatVal = DAG.getSplatValue(LHS); if (!SplatVal || SplatVal.getValueType() != BasePtr.getValueType()) return false; BasePtr = SplatVal; Index = Index.getOperand(1); return true; } // Fold sext/zext of index into index type. bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT, SelectionDAG &DAG) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // It's always safe to look through zero extends. if (Index.getOpcode() == ISD::ZERO_EXTEND) { SDValue Op = Index.getOperand(0); if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType(), DataVT)) { IndexType = ISD::UNSIGNED_SCALED; Index = Op; return true; } if (ISD::isIndexTypeSigned(IndexType)) { IndexType = ISD::UNSIGNED_SCALED; return true; } } // It's only safe to look through sign extends when Index is signed. if (Index.getOpcode() == ISD::SIGN_EXTEND && ISD::isIndexTypeSigned(IndexType)) { SDValue Op = Index.getOperand(0); if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType(), DataVT)) { Index = Op; return true; } } return false; } SDValue DAGCombiner::visitMSCATTER(SDNode *N) { MaskedScatterSDNode *MSC = cast(N); SDValue Mask = MSC->getMask(); SDValue Chain = MSC->getChain(); SDValue Index = MSC->getIndex(); SDValue Scale = MSC->getScale(); SDValue StoreVal = MSC->getValue(); SDValue BasePtr = MSC->getBasePtr(); ISD::MemIndexType IndexType = MSC->getIndexType(); SDLoc DL(N); // Zap scatters with a zero mask. if (ISD::isConstantSplatVectorAllZeros(Mask.getNode())) return Chain; if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG)) { SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale}; return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops, MSC->getMemOperand(), IndexType, MSC->isTruncatingStore()); } if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) { SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale}; return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops, MSC->getMemOperand(), IndexType, MSC->isTruncatingStore()); } return SDValue(); } SDValue DAGCombiner::visitMSTORE(SDNode *N) { MaskedStoreSDNode *MST = cast(N); SDValue Mask = MST->getMask(); SDValue Chain = MST->getChain(); SDValue Value = MST->getValue(); SDValue Ptr = MST->getBasePtr(); SDLoc DL(N); // Zap masked stores with a zero mask. if (ISD::isConstantSplatVectorAllZeros(Mask.getNode())) return Chain; // If this is a masked load with an all ones mask, we can use a unmasked load. // FIXME: Can we do this for indexed, compressing, or truncating stores? if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() && !MST->isCompressingStore() && !MST->isTruncatingStore()) return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(), MST->getBasePtr(), MST->getPointerInfo(), MST->getOriginalAlign(), MachineMemOperand::MOStore, MST->getAAInfo()); // Try transforming N to an indexed store. if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) return SDValue(N, 0); if (MST->isTruncatingStore() && MST->isUnindexed() && Value.getValueType().isInteger() && (!isa(Value) || !cast(Value)->isOpaque())) { APInt TruncDemandedBits = APInt::getLowBitsSet(Value.getScalarValueSizeInBits(), MST->getMemoryVT().getScalarSizeInBits()); // See if we can simplify the operation with // SimplifyDemandedBits, which only works if the value has a single use. if (SimplifyDemandedBits(Value, TruncDemandedBits)) { // Re-visit the store if anything changed and the store hasn't been merged // with another node (N is deleted) SimplifyDemandedBits will add Value's // node back to the worklist if necessary, but we also need to re-visit // the Store node itself. if (N->getOpcode() != ISD::DELETED_NODE) AddToWorklist(N); return SDValue(N, 0); } } // If this is a TRUNC followed by a masked store, fold this into a masked // truncating store. We can do this even if this is already a masked // truncstore. if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() && MST->isUnindexed() && TLI.canCombineTruncStore(Value.getOperand(0).getValueType(), MST->getMemoryVT(), LegalOperations)) { auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(), Value.getOperand(0).getValueType()); return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, MST->getOffset(), Mask, MST->getMemoryVT(), MST->getMemOperand(), MST->getAddressingMode(), /*IsTruncating=*/true); } return SDValue(); } SDValue DAGCombiner::visitMGATHER(SDNode *N) { MaskedGatherSDNode *MGT = cast(N); SDValue Mask = MGT->getMask(); SDValue Chain = MGT->getChain(); SDValue Index = MGT->getIndex(); SDValue Scale = MGT->getScale(); SDValue PassThru = MGT->getPassThru(); SDValue BasePtr = MGT->getBasePtr(); ISD::MemIndexType IndexType = MGT->getIndexType(); SDLoc DL(N); // Zap gathers with a zero mask. if (ISD::isConstantSplatVectorAllZeros(Mask.getNode())) return CombineTo(N, PassThru, MGT->getChain()); if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG)) { SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale}; return DAG.getMaskedGather( DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL, Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType()); } if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) { SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale}; return DAG.getMaskedGather( DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL, Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType()); } return SDValue(); } SDValue DAGCombiner::visitMLOAD(SDNode *N) { MaskedLoadSDNode *MLD = cast(N); SDValue Mask = MLD->getMask(); SDLoc DL(N); // Zap masked loads with a zero mask. if (ISD::isConstantSplatVectorAllZeros(Mask.getNode())) return CombineTo(N, MLD->getPassThru(), MLD->getChain()); // If this is a masked load with an all ones mask, we can use a unmasked load. // FIXME: Can we do this for indexed, expanding, or extending loads? if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() && !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) { SDValue NewLd = DAG.getLoad( N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(), MLD->getPointerInfo(), MLD->getOriginalAlign(), MachineMemOperand::MOLoad, MLD->getAAInfo(), MLD->getRanges()); return CombineTo(N, NewLd, NewLd.getValue(1)); } // Try transforming N to an indexed load. if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) return SDValue(N, 0); return SDValue(); } /// A vector select of 2 constant vectors can be simplified to math/logic to /// avoid a variable select instruction and possibly avoid constant loads. SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) { SDValue Cond = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); EVT VT = N->getValueType(0); if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 || !TLI.convertSelectOfConstantsToMath(VT) || !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) || !ISD::isBuildVectorOfConstantSDNodes(N2.getNode())) return SDValue(); // Check if we can use the condition value to increment/decrement a single // constant value. This simplifies a select to an add and removes a constant // load/materialization from the general case. bool AllAddOne = true; bool AllSubOne = true; unsigned Elts = VT.getVectorNumElements(); for (unsigned i = 0; i != Elts; ++i) { SDValue N1Elt = N1.getOperand(i); SDValue N2Elt = N2.getOperand(i); if (N1Elt.isUndef() || N2Elt.isUndef()) continue; if (N1Elt.getValueType() != N2Elt.getValueType()) continue; const APInt &C1 = cast(N1Elt)->getAPIntValue(); const APInt &C2 = cast(N2Elt)->getAPIntValue(); if (C1 != C2 + 1) AllAddOne = false; if (C1 != C2 - 1) AllSubOne = false; } // Further simplifications for the extra-special cases where the constants are // all 0 or all -1 should be implemented as folds of these patterns. SDLoc DL(N); if (AllAddOne || AllSubOne) { // vselect Cond, C+1, C --> add (zext Cond), C // vselect Cond, C-1, C --> add (sext Cond), C auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND; SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond); return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2); } // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C) APInt Pow2C; if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() && isNullOrNullSplat(N2)) { SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT); SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT); return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC); } if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG)) return V; // The general case for select-of-constants: // vselect Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2 // ...but that only makes sense if a vselect is slower than 2 logic ops, so // leave that to a machine-specific pass. return SDValue(); } SDValue DAGCombiner::visitVSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); EVT VT = N->getValueType(0); SDLoc DL(N); if (SDValue V = DAG.simplifySelect(N0, N1, N2)) return V; if (SDValue V = foldBoolSelectToLogic(N, DAG)) return V; // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) return DAG.getSelect(DL, VT, F, N2, N1); // Canonicalize integer abs. // vselect (setg[te] X, 0), X, -X -> // vselect (setgt X, -1), X, -X -> // vselect (setl[te] X, 0), -X, X -> // Y = sra (X, size(X)-1); xor (add (X, Y), Y) if (N0.getOpcode() == ISD::SETCC) { SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); ISD::CondCode CC = cast(N0.getOperand(2))->get(); bool isAbs = false; bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) || (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) && N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1)) isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode()); else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) && N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1)) isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); if (isAbs) { if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) return DAG.getNode(ISD::ABS, DL, VT, LHS); SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS, DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, getShiftAmountTy(VT))); SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift); AddToWorklist(Shift.getNode()); AddToWorklist(Add.getNode()); return DAG.getNode(ISD::XOR, DL, VT, Add, Shift); } // vselect x, y (fcmp lt x, y) -> fminnum x, y // vselect x, y (fcmp gt x, y) -> fmaxnum x, y // // This is OK if we don't care about what happens if either operand is a // NaN. // if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) { if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG)) return FMinMax; } if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG)) return S; if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG)) return S; // If this select has a condition (setcc) with narrower operands than the // select, try to widen the compare to match the select width. // TODO: This should be extended to handle any constant. // TODO: This could be extended to handle non-loading patterns, but that // requires thorough testing to avoid regressions. if (isNullOrNullSplat(RHS)) { EVT NarrowVT = LHS.getValueType(); EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger(); EVT SetCCVT = getSetCCResultType(LHS.getValueType()); unsigned SetCCWidth = SetCCVT.getScalarSizeInBits(); unsigned WideWidth = WideVT.getScalarSizeInBits(); bool IsSigned = isSignedIntSetCC(CC); auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD; if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() && SetCCWidth != 1 && SetCCWidth < WideWidth && TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) && TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) { // Both compare operands can be widened for free. The LHS can use an // extended load, and the RHS is a constant: // vselect (ext (setcc load(X), C)), N1, N2 --> // vselect (setcc extload(X), C'), N1, N2 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS); SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS); EVT WideSetCCVT = getSetCCResultType(WideVT); SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC); return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2); } } // Match VSELECTs into add with unsigned saturation. if (hasOperation(ISD::UADDSAT, VT)) { // Check if one of the arms of the VSELECT is vector with all bits set. // If it's on the left side invert the predicate to simplify logic below. SDValue Other; ISD::CondCode SatCC = CC; if (ISD::isConstantSplatVectorAllOnes(N1.getNode())) { Other = N2; SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType()); } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) { Other = N1; } if (Other && Other.getOpcode() == ISD::ADD) { SDValue CondLHS = LHS, CondRHS = RHS; SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1); // Canonicalize condition operands. if (SatCC == ISD::SETUGE) { std::swap(CondLHS, CondRHS); SatCC = ISD::SETULE; } // We can test against either of the addition operands. // x <= x+y ? x+y : ~0 --> uaddsat x, y // x+y >= x ? x+y : ~0 --> uaddsat x, y if (SatCC == ISD::SETULE && Other == CondRHS && (OpLHS == CondLHS || OpRHS == CondLHS)) return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS); if (OpRHS.getOpcode() == CondRHS.getOpcode() && (OpRHS.getOpcode() == ISD::BUILD_VECTOR || OpRHS.getOpcode() == ISD::SPLAT_VECTOR) && CondLHS == OpLHS) { // If the RHS is a constant we have to reverse the const // canonicalization. // x >= ~C ? x+C : ~0 --> uaddsat x, C auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) { return Cond->getAPIntValue() == ~Op->getAPIntValue(); }; if (SatCC == ISD::SETULE && ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT)) return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS); } } } // Match VSELECTs into sub with unsigned saturation. if (hasOperation(ISD::USUBSAT, VT)) { // Check if one of the arms of the VSELECT is a zero vector. If it's on // the left side invert the predicate to simplify logic below. SDValue Other; ISD::CondCode SatCC = CC; if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) { Other = N2; SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType()); } else if (ISD::isConstantSplatVectorAllZeros(N2.getNode())) { Other = N1; } // zext(x) >= y ? trunc(zext(x) - y) : 0 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit))) // zext(x) > y ? trunc(zext(x) - y) : 0 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit))) if (Other && Other.getOpcode() == ISD::TRUNCATE && Other.getOperand(0).getOpcode() == ISD::SUB && (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)) { SDValue OpLHS = Other.getOperand(0).getOperand(0); SDValue OpRHS = Other.getOperand(0).getOperand(1); if (LHS == OpLHS && RHS == OpRHS && LHS.getOpcode() == ISD::ZERO_EXTEND) if (SDValue R = getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS, DAG, DL)) return R; } if (Other && Other.getNumOperands() == 2) { SDValue CondRHS = RHS; SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1); if (OpLHS == LHS) { // Look for a general sub with unsigned saturation first. // x >= y ? x-y : 0 --> usubsat x, y // x > y ? x-y : 0 --> usubsat x, y if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) && Other.getOpcode() == ISD::SUB && OpRHS == CondRHS) return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS); if (OpRHS.getOpcode() == ISD::BUILD_VECTOR || OpRHS.getOpcode() == ISD::SPLAT_VECTOR) { if (CondRHS.getOpcode() == ISD::BUILD_VECTOR || CondRHS.getOpcode() == ISD::SPLAT_VECTOR) { // If the RHS is a constant we have to reverse the const // canonicalization. // x > C-1 ? x+-C : 0 --> usubsat x, C auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) { return (!Op && !Cond) || (Op && Cond && Cond->getAPIntValue() == (-Op->getAPIntValue() - 1)); }; if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD && ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT, /*AllowUndefs*/ true)) { OpRHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), OpRHS); return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS); } // Another special case: If C was a sign bit, the sub has been // canonicalized into a xor. // FIXME: Would it be better to use computeKnownBits to // determine whether it's safe to decanonicalize the xor? // x s< 0 ? x^C : 0 --> usubsat x, C APInt SplatValue; if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR && ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) && ISD::isConstantSplatVectorAllZeros(CondRHS.getNode()) && SplatValue.isSignMask()) { // Note that we have to rebuild the RHS constant here to // ensure we don't rely on particular values of undef lanes. OpRHS = DAG.getConstant(SplatValue, DL, VT); return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS); } } } } } } } if (SimplifySelectOps(N, N1, N2)) return SDValue(N, 0); // Don't revisit N. // Fold (vselect all_ones, N1, N2) -> N1 if (ISD::isConstantSplatVectorAllOnes(N0.getNode())) return N1; // Fold (vselect all_zeros, N1, N2) -> N2 if (ISD::isConstantSplatVectorAllZeros(N0.getNode())) return N2; // The ConvertSelectToConcatVector function is assuming both the above // checks for (vselect (build_vector all{ones,zeros) ...) have been made // and addressed. if (N1.getOpcode() == ISD::CONCAT_VECTORS && N2.getOpcode() == ISD::CONCAT_VECTORS && ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { if (SDValue CV = ConvertSelectToConcatVector(N, DAG)) return CV; } if (SDValue V = foldVSelectOfConstants(N)) return V; if (hasOperation(ISD::SRA, VT)) if (SDValue V = foldVSelectToSignBitSplatMask(N, DAG)) return V; if (SimplifyDemandedVectorElts(SDValue(N, 0))) return SDValue(N, 0); return SDValue(); } SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); SDValue N3 = N->getOperand(3); SDValue N4 = N->getOperand(4); ISD::CondCode CC = cast(N4)->get(); // fold select_cc lhs, rhs, x, x, cc -> x if (N2 == N3) return N2; // Determine if the condition we're dealing with is constant if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1, CC, SDLoc(N), false)) { AddToWorklist(SCC.getNode()); // cond always true -> true val // cond always false -> false val if (auto *SCCC = dyn_cast(SCC.getNode())) return SCCC->isZero() ? N3 : N2; // When the condition is UNDEF, just return the first operand. This is // coherent the DAG creation, no setcc node is created in this case if (SCC->isUndef()) return N2; // Fold to a simpler select_cc if (SCC.getOpcode() == ISD::SETCC) { SDValue SelectOp = DAG.getNode( ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0), SCC.getOperand(1), N2, N3, SCC.getOperand(2)); SelectOp->setFlags(SCC->getFlags()); return SelectOp; } } // If we can fold this based on the true/false value, do so. if (SimplifySelectOps(N, N2, N3)) return SDValue(N, 0); // Don't revisit N. // fold select_cc into other things, such as min/max/abs return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC); } SDValue DAGCombiner::visitSETCC(SDNode *N) { // setcc is very commonly used as an argument to brcond. This pattern // also lend itself to numerous combines and, as a result, it is desired // we keep the argument to a brcond as a setcc as much as possible. bool PreferSetCC = N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND; ISD::CondCode Cond = cast(N->getOperand(2))->get(); EVT VT = N->getValueType(0); // SETCC(FREEZE(X), CONST, Cond) // => // FREEZE(SETCC(X, CONST, Cond)) // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond) // isn't equivalent to true or false. // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to // FREEZE(SETCC(X, -128, SETULT)) because X can be poison. // // This transformation is beneficial because visitBRCOND can fold // BRCOND(FREEZE(X)) to BRCOND(X). // Conservatively optimize integer comparisons only. if (PreferSetCC) { // Do this only when SETCC is going to be used by BRCOND. SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); bool Updated = false; // Is 'X Cond C' always true or false? auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) { bool False = (Cond == ISD::SETULT && C->isZero()) || (Cond == ISD::SETLT && C->isMinSignedValue()) || (Cond == ISD::SETUGT && C->isAllOnes()) || (Cond == ISD::SETGT && C->isMaxSignedValue()); bool True = (Cond == ISD::SETULE && C->isAllOnes()) || (Cond == ISD::SETLE && C->isMaxSignedValue()) || (Cond == ISD::SETUGE && C->isZero()) || (Cond == ISD::SETGE && C->isMinSignedValue()); return True || False; }; if (N0->getOpcode() == ISD::FREEZE && N0.hasOneUse() && N1C) { if (!IsAlwaysTrueOrFalse(Cond, N1C)) { N0 = N0->getOperand(0); Updated = true; } } if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse() && N0C) { if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond), N0C)) { N1 = N1->getOperand(0); Updated = true; } } if (Updated) return DAG.getFreeze(DAG.getSetCC(SDLoc(N), VT, N0, N1, Cond)); } SDValue Combined = SimplifySetCC(VT, N->getOperand(0), N->getOperand(1), Cond, SDLoc(N), !PreferSetCC); if (!Combined) return SDValue(); // If we prefer to have a setcc, and we don't, we'll try our best to // recreate one using rebuildSetCC. if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) { SDValue NewSetCC = rebuildSetCC(Combined); // We don't have anything interesting to combine to. if (NewSetCC.getNode() == N) return SDValue(); if (NewSetCC) return NewSetCC; } return Combined; } SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); SDValue Carry = N->getOperand(2); SDValue Cond = N->getOperand(3); // If Carry is false, fold to a regular SETCC. if (isNullConstant(Carry)) return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond); return SDValue(); } /// Check if N satisfies: /// N is used once. /// N is a Load. /// The load is compatible with ExtOpcode. It means /// If load has explicit zero/sign extension, ExpOpcode must have the same /// extension. /// Otherwise returns true. static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) { if (!N.hasOneUse()) return false; if (!isa(N)) return false; LoadSDNode *Load = cast(N); ISD::LoadExtType LoadExt = Load->getExtensionType(); if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD) return true; // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same // extension. if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) || (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND)) return false; return true; } /// Fold /// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y) /// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y) /// (aext (select c, load x, load y)) -> (select c, extload x, extload y) /// This function is called by the DAGCombiner when visiting sext/zext/aext /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI, SelectionDAG &DAG) { unsigned Opcode = N->getOpcode(); SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); SDLoc DL(N); assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND) && "Expected EXTEND dag node in input!"); if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) || !N0.hasOneUse()) return SDValue(); SDValue Op1 = N0->getOperand(1); SDValue Op2 = N0->getOperand(2); if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode)) return SDValue(); auto ExtLoadOpcode = ISD::EXTLOAD; if (Opcode == ISD::SIGN_EXTEND) ExtLoadOpcode = ISD::SEXTLOAD; else if (Opcode == ISD::ZERO_EXTEND) ExtLoadOpcode = ISD::ZEXTLOAD; LoadSDNode *Load1 = cast(Op1); LoadSDNode *Load2 = cast(Op2); if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) || !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT())) return SDValue(); SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1); SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2); return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2); } /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or /// a build_vector of constants. /// This function is called by the DAGCombiner when visiting sext/zext/aext /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). /// Vector extends are not folded if operations are legal; this is to /// avoid introducing illegal build_vector dag nodes. static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalTypes) { unsigned Opcode = N->getOpcode(); SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); SDLoc DL(N); assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && "Expected EXTEND dag node in input!"); // fold (sext c1) -> c1 // fold (zext c1) -> c1 // fold (aext c1) -> c1 if (isa(N0)) return DAG.getNode(Opcode, DL, VT, N0); // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2) // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2) // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2) if (N0->getOpcode() == ISD::SELECT) { SDValue Op1 = N0->getOperand(1); SDValue Op2 = N0->getOperand(2); if (isa(Op1) && isa(Op2) && (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) { // For any_extend, choose sign extension of the constants to allow a // possible further transform to sign_extend_inreg.i.e. // // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0> // t2: i64 = any_extend t1 // --> // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0> // --> // t4: i64 = sign_extend_inreg t3 unsigned FoldOpc = Opcode; if (FoldOpc == ISD::ANY_EXTEND) FoldOpc = ISD::SIGN_EXTEND; return DAG.getSelect(DL, VT, N0->getOperand(0), DAG.getNode(FoldOpc, DL, VT, Op1), DAG.getNode(FoldOpc, DL, VT, Op2)); } } // fold (sext (build_vector AllConstants) -> (build_vector AllConstants) // fold (zext (build_vector AllConstants) -> (build_vector AllConstants) // fold (aext (build_vector AllConstants) -> (build_vector AllConstants) EVT SVT = VT.getScalarType(); if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) && ISD::isBuildVectorOfConstantSDNodes(N0.getNode()))) return SDValue(); // We can fold this node into a build_vector. unsigned VTBits = SVT.getSizeInBits(); unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits(); SmallVector Elts; unsigned NumElts = VT.getVectorNumElements(); // For zero-extensions, UNDEF elements still guarantee to have the upper // bits set to zero. bool IsZext = Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG; for (unsigned i = 0; i != NumElts; ++i) { SDValue Op = N0.getOperand(i); if (Op.isUndef()) { Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT)); continue; } SDLoc DL(Op); // Get the constant value and if needed trunc it to the size of the type. // Nodes like build_vector might have constants wider than the scalar type. APInt C = cast(Op)->getAPIntValue().zextOrTrunc(EVTBits); if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG) Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT)); else Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT)); } return DAG.getBuildVector(VT, DL, Elts); } // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))" // transformation. Returns true if extension are possible and the above // mentioned transformation is profitable. static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, unsigned ExtOpc, SmallVectorImpl &ExtendNodes, const TargetLowering &TLI) { bool HasCopyToRegUses = false; bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType()); for (SDNode::use_iterator UI = N0->use_begin(), UE = N0->use_end(); UI != UE; ++UI) { SDNode *User = *UI; if (User == N) continue; if (UI.getUse().getResNo() != N0.getResNo()) continue; // FIXME: Only extend SETCC N, N and SETCC N, c for now. if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) { ISD::CondCode CC = cast(User->getOperand(2))->get(); if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC)) // Sign bits will be lost after a zext. return false; bool Add = false; for (unsigned i = 0; i != 2; ++i) { SDValue UseOp = User->getOperand(i); if (UseOp == N0) continue; if (!isa(UseOp)) return false; Add = true; } if (Add) ExtendNodes.push_back(User); continue; } // If truncates aren't free and there are users we can't // extend, it isn't worthwhile. if (!isTruncFree) return false; // Remember if this value is live-out. if (User->getOpcode() == ISD::CopyToReg) HasCopyToRegUses = true; } if (HasCopyToRegUses) { bool BothLiveOut = false; for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE; ++UI) { SDUse &Use = UI.getUse(); if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) { BothLiveOut = true; break; } } if (BothLiveOut) // Both unextended and extended values are live out. There had better be // a good reason for the transformation. return ExtendNodes.size(); } return true; } void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl &SetCCs, SDValue OrigLoad, SDValue ExtLoad, ISD::NodeType ExtType) { // Extend SetCC uses if necessary. SDLoc DL(ExtLoad); for (SDNode *SetCC : SetCCs) { SmallVector Ops; for (unsigned j = 0; j != 2; ++j) { SDValue SOp = SetCC->getOperand(j); if (SOp == OrigLoad) Ops.push_back(ExtLoad); else Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp)); } Ops.push_back(SetCC->getOperand(2)); CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops)); } } // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?). SDValue DAGCombiner::CombineExtLoad(SDNode *N) { SDValue N0 = N->getOperand(0); EVT DstVT = N->getValueType(0); EVT SrcVT = N0.getValueType(); assert((N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) && "Unexpected node type (not an extend)!"); // fold (sext (load x)) to multiple smaller sextloads; same for zext. // For example, on a target with legal v4i32, but illegal v8i32, turn: // (v8i32 (sext (v8i16 (load x)))) // into: // (v8i32 (concat_vectors (v4i32 (sextload x)), // (v4i32 (sextload (x + 16))))) // Where uses of the original load, i.e.: // (v8i16 (load x)) // are replaced with: // (v8i16 (truncate // (v8i32 (concat_vectors (v4i32 (sextload x)), // (v4i32 (sextload (x + 16))))))) // // This combine is only applicable to illegal, but splittable, vectors. // All legal types, and illegal non-vector types, are handled elsewhere. // This combine is controlled by TargetLowering::isVectorLoadExtDesirable. // if (N0->getOpcode() != ISD::LOAD) return SDValue(); LoadSDNode *LN0 = cast(N0); if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) || !N0.hasOneUse() || !LN0->isSimple() || !DstVT.isVector() || !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0))) return SDValue(); SmallVector SetCCs; if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI)) return SDValue(); ISD::LoadExtType ExtType = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD; // Try to split the vector types to get down to legal types. EVT SplitSrcVT = SrcVT; EVT SplitDstVT = DstVT; while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) && SplitSrcVT.getVectorNumElements() > 1) { SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first; SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first; } if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT)) return SDValue(); assert(!DstVT.isScalableVector() && "Unexpected scalable vector type"); SDLoc DL(N); const unsigned NumSplits = DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements(); const unsigned Stride = SplitSrcVT.getStoreSize(); SmallVector Loads; SmallVector Chains; SDValue BasePtr = LN0->getBasePtr(); for (unsigned Idx = 0; Idx < NumSplits; Idx++) { const unsigned Offset = Idx * Stride; const Align Align = commonAlignment(LN0->getAlign(), Offset); SDValue SplitLoad = DAG.getExtLoad( ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr, LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align, LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::Fixed(Stride), DL); Loads.push_back(SplitLoad.getValue(0)); Chains.push_back(SplitLoad.getValue(1)); } SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads); // Simplify TF. AddToWorklist(NewChain.getNode()); CombineTo(N, NewValue); // Replace uses of the original load (before extension) // with a truncate of the concatenated sextloaded vectors. SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue); ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode()); CombineTo(N0.getNode(), Trunc, NewChain); return SDValue(N, 0); // Return N so it doesn't get rechecked! } // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) -> // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst)) SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) { assert(N->getOpcode() == ISD::ZERO_EXTEND); EVT VT = N->getValueType(0); EVT OrigVT = N->getOperand(0).getValueType(); if (TLI.isZExtFree(OrigVT, VT)) return SDValue(); // and/or/xor SDValue N0 = N->getOperand(0); if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::XOR) || N0.getOperand(1).getOpcode() != ISD::Constant || (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT))) return SDValue(); // shl/shr SDValue N1 = N0->getOperand(0); if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) || N1.getOperand(1).getOpcode() != ISD::Constant || (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT))) return SDValue(); // load if (!isa(N1.getOperand(0))) return SDValue(); LoadSDNode *Load = cast(N1.getOperand(0)); EVT MemVT = Load->getMemoryVT(); if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) || Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed()) return SDValue(); // If the shift op is SHL, the logic op must be AND, otherwise the result // will be wrong. if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND) return SDValue(); if (!N0.hasOneUse() || !N1.hasOneUse()) return SDValue(); SmallVector SetCCs; if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0), ISD::ZERO_EXTEND, SetCCs, TLI)) return SDValue(); // Actually do the transformation. SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT, Load->getChain(), Load->getBasePtr(), Load->getMemoryVT(), Load->getMemOperand()); SDLoc DL1(N1); SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad, N1.getOperand(1)); APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits()); SDLoc DL0(N0); SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift, DAG.getConstant(Mask, DL0, VT)); ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND); CombineTo(N, And); if (SDValue(Load, 0).hasOneUse()) { DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1)); } else { SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load), Load->getValueType(0), ExtLoad); CombineTo(Load, Trunc, ExtLoad.getValue(1)); } // N0 is dead at this point. recursivelyDeleteUnusedNodes(N0.getNode()); return SDValue(N,0); // Return N so it doesn't get rechecked! } /// If we're narrowing or widening the result of a vector select and the final /// size is the same size as a setcc (compare) feeding the select, then try to /// apply the cast operation to the select's operands because matching vector /// sizes for a select condition and other operands should be more efficient. SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) { unsigned CastOpcode = Cast->getOpcode(); assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND) && "Unexpected opcode for vector select narrowing/widening"); // We only do this transform before legal ops because the pattern may be // obfuscated by target-specific operations after legalization. Do not create // an illegal select op, however, because that may be difficult to lower. EVT VT = Cast->getValueType(0); if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT)) return SDValue(); SDValue VSel = Cast->getOperand(0); if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() || VSel.getOperand(0).getOpcode() != ISD::SETCC) return SDValue(); // Does the setcc have the same vector size as the casted select? SDValue SetCC = VSel.getOperand(0); EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType()); if (SetCCVT.getSizeInBits() != VT.getSizeInBits()) return SDValue(); // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B) SDValue A = VSel.getOperand(1); SDValue B = VSel.getOperand(2); SDValue CastA, CastB; SDLoc DL(Cast); if (CastOpcode == ISD::FP_ROUND) { // FP_ROUND (fptrunc) has an extra flag operand to pass along. CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1)); CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1)); } else { CastA = DAG.getNode(CastOpcode, DL, VT, A); CastB = DAG.getNode(CastOpcode, DL, VT, B); } return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB); } // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x))) // fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x))) static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType) { SDNode *N0Node = N0.getNode(); bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node) : ISD::isZEXTLoad(N0Node); if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) || !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse()) return SDValue(); LoadSDNode *LN0 = cast(N0); EVT MemVT = LN0->getMemoryVT(); if ((LegalOperations || !LN0->isSimple() || VT.isVector()) && !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT)) return SDValue(); SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(), LN0->getBasePtr(), MemVT, LN0->getMemOperand()); Combiner.CombineTo(N, ExtLoad); DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); if (LN0->use_empty()) Combiner.recursivelyDeleteUnusedNodes(LN0); return SDValue(N, 0); // Return N so it doesn't get rechecked! } // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x))) // Only generate vector extloads when 1) they're legal, and 2) they are // deemed desirable by the target. static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc) { // TODO: isFixedLengthVector() should be removed and any negative effects on // code generation being the result of that target's implementation of // isVectorLoadExtDesirable(). if (!ISD::isNON_EXTLoad(N0.getNode()) || !ISD::isUNINDEXEDLoad(N0.getNode()) || ((LegalOperations || VT.isFixedLengthVector() || !cast(N0)->isSimple()) && !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))) return {}; bool DoXform = true; SmallVector SetCCs; if (!N0.hasOneUse()) DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI); if (VT.isVector()) DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0)); if (!DoXform) return {}; LoadSDNode *LN0 = cast(N0); SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(), LN0->getBasePtr(), N0.getValueType(), LN0->getMemOperand()); Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc); // If the load value is used only by N, replace it via CombineTo N. bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse(); Combiner.CombineTo(N, ExtLoad); if (NoReplaceTrunc) { DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); Combiner.recursivelyDeleteUnusedNodes(LN0); } else { SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1)); } return SDValue(N, 0); // Return N so it doesn't get rechecked! } static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc) { if (!N0.hasOneUse()) return SDValue(); MaskedLoadSDNode *Ld = dyn_cast(N0); if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD) return SDValue(); if (!TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0))) return SDValue(); if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0))) return SDValue(); SDLoc dl(Ld); SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru()); SDValue NewLoad = DAG.getMaskedLoad( VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(), PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(), ExtLoadType, Ld->isExpandingLoad()); DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1)); return NewLoad; } static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, bool LegalOperations) { assert((N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext"); SDValue SetCC = N->getOperand(0); if (LegalOperations || SetCC.getOpcode() != ISD::SETCC || !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1) return SDValue(); SDValue X = SetCC.getOperand(0); SDValue Ones = SetCC.getOperand(1); ISD::CondCode CC = cast(SetCC.getOperand(2))->get(); EVT VT = N->getValueType(0); EVT XVT = X.getValueType(); // setge X, C is canonicalized to setgt, so we do not need to match that // pattern. The setlt sibling is folded in SimplifySelectCC() because it does // not require the 'not' op. if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) { // Invert and smear/shift the sign bit: // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1) // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1) SDLoc DL(N); unsigned ShCt = VT.getSizeInBits() - 1; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) { SDValue NotX = DAG.getNOT(DL, X, VT); SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT); auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL; return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount); } } return SDValue(); } SDValue DAGCombiner::foldSextSetcc(SDNode *N) { SDValue N0 = N->getOperand(0); if (N0.getOpcode() != ISD::SETCC) return SDValue(); SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); ISD::CondCode CC = cast(N0.getOperand(2))->get(); EVT VT = N->getValueType(0); EVT N00VT = N00.getValueType(); SDLoc DL(N); // Propagate fast-math-flags. SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags()); // On some architectures (such as SSE/NEON/etc) the SETCC result type is // the same size as the compared operands. Try to optimize sext(setcc()) // if this is the case. if (VT.isVector() && !LegalOperations && TLI.getBooleanContents(N00VT) == TargetLowering::ZeroOrNegativeOneBooleanContent) { EVT SVT = getSetCCResultType(N00VT); // If we already have the desired type, don't change it. if (SVT != N0.getValueType()) { // We know that the # elements of the results is the same as the // # elements of the compare (and the # elements of the compare result // for that matter). Check to see that they are the same size. If so, // we know that the element size of the sext'd result matches the // element size of the compare operands. if (VT.getSizeInBits() == SVT.getSizeInBits()) return DAG.getSetCC(DL, VT, N00, N01, CC); // If the desired elements are smaller or larger than the source // elements, we can use a matching integer vector type and then // truncate/sign extend. EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger(); if (SVT == MatchingVecType) { SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC); return DAG.getSExtOrTrunc(VsetCC, DL, VT); } } // Try to eliminate the sext of a setcc by zexting the compare operands. if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) && !TLI.isOperationLegalOrCustom(ISD::SETCC, SVT)) { bool IsSignedCmp = ISD::isSignedIntSetCC(CC); unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD; unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; // We have an unsupported narrow vector compare op that would be legal // if extended to the destination type. See if the compare operands // can be freely extended to the destination type. auto IsFreeToExtend = [&](SDValue V) { if (isConstantOrConstantVector(V, /*NoOpaques*/ true)) return true; // Match a simple, non-extended load that can be converted to a // legal {z/s}ext-load. // TODO: Allow widening of an existing {z/s}ext-load? if (!(ISD::isNON_EXTLoad(V.getNode()) && ISD::isUNINDEXEDLoad(V.getNode()) && cast(V)->isSimple() && TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType()))) return false; // Non-chain users of this value must either be the setcc in this // sequence or extends that can be folded into the new {z/s}ext-load. for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end(); UI != UE; ++UI) { // Skip uses of the chain and the setcc. SDNode *User = *UI; if (UI.getUse().getResNo() != 0 || User == N0.getNode()) continue; // Extra users must have exactly the same cast we are about to create. // TODO: This restriction could be eased if ExtendUsesToFormExtLoad() // is enhanced similarly. if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT) return false; } return true; }; if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) { SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00); SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01); return DAG.getSetCC(DL, VT, Ext0, Ext1, CC); } } } // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0) // Here, T can be 1 or -1, depending on the type of the setcc and // getBooleanContents(). unsigned SetCCWidth = N0.getScalarValueSizeInBits(); // To determine the "true" side of the select, we need to know the high bit // of the value returned by the setcc if it evaluates to true. // If the type of the setcc is i1, then the true case of the select is just // sext(i1 1), that is, -1. // If the type of the setcc is larger (say, i8) then the value of the high // bit depends on getBooleanContents(), so ask TLI for a real "true" value // of the appropriate width. SDValue ExtTrueVal = (SetCCWidth == 1) ? DAG.getAllOnesConstant(DL, VT) : DAG.getBoolConstant(true, DL, VT, N00VT); SDValue Zero = DAG.getConstant(0, DL, VT); if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true)) return SCC; if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) { EVT SetCCVT = getSetCCResultType(N00VT); // Don't do this transform for i1 because there's a select transform // that would reverse it. // TODO: We should not do this transform at all without a target hook // because a sext is likely cheaper than a select? if (SetCCVT.getScalarSizeInBits() != 1 && (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) { SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC); return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero); } } return SDValue(); } SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); SDLoc DL(N); // sext(undef) = 0 because the top bit will all be the same. if (N0.isUndef()) return DAG.getConstant(0, DL, VT); if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) return Res; // fold (sext (sext x)) -> (sext x) // fold (sext (aext x)) -> (sext x) if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0)); if (N0.getOpcode() == ISD::TRUNCATE) { // fold (sext (truncate (load x))) -> (sext (smaller load x)) // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) { SDNode *oye = N0.getOperand(0).getNode(); if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); // CombineTo deleted the truncate, if needed, but not what's under it. AddToWorklist(oye); } return SDValue(N, 0); // Return N so it doesn't get rechecked! } // See if the value being truncated is already sign extended. If so, just // eliminate the trunc/sext pair. SDValue Op = N0.getOperand(0); unsigned OpBits = Op.getScalarValueSizeInBits(); unsigned MidBits = N0.getScalarValueSizeInBits(); unsigned DestBits = VT.getScalarSizeInBits(); unsigned NumSignBits = DAG.ComputeNumSignBits(Op); if (OpBits == DestBits) { // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign // bits, it is already ready. if (NumSignBits > DestBits-MidBits) return Op; } else if (OpBits < DestBits) { // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign // bits, just sext from i32. if (NumSignBits > OpBits-MidBits) return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op); } else { // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign // bits, just truncate to i32. if (NumSignBits > OpBits-MidBits) return DAG.getNode(ISD::TRUNCATE, DL, VT, Op); } // fold (sext (truncate x)) -> (sextinreg x). if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, N0.getValueType())) { if (OpBits < DestBits) Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op); else if (OpBits > DestBits) Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op); return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op, DAG.getValueType(N0.getValueType())); } } // Try to simplify (sext (load x)). if (SDValue foldedExt = tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD, ISD::SIGN_EXTEND)) return foldedExt; if (SDValue foldedExt = tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD, ISD::SIGN_EXTEND)) return foldedExt; // fold (sext (load x)) to multiple smaller sextloads. // Only on illegal but splittable vectors. if (SDValue ExtLoad = CombineExtLoad(N)) return ExtLoad; // Try to simplify (sext (sextload x)). if (SDValue foldedExt = tryToFoldExtOfExtload( DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD)) return foldedExt; // fold (sext (and/or/xor (load x), cst)) -> // (and/or/xor (sextload x), (sext cst)) if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::XOR) && isa(N0.getOperand(0)) && N0.getOperand(1).getOpcode() == ISD::Constant && (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { LoadSDNode *LN00 = cast(N0.getOperand(0)); EVT MemVT = LN00->getMemoryVT(); if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) && LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) { SmallVector SetCCs; bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0), ISD::SIGN_EXTEND, SetCCs, TLI); if (DoXform) { SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT, LN00->getChain(), LN00->getBasePtr(), LN00->getMemoryVT(), LN00->getMemOperand()); APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits()); SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, ExtLoad, DAG.getConstant(Mask, DL, VT)); ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND); bool NoReplaceTruncAnd = !N0.hasOneUse(); bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse(); CombineTo(N, And); // If N0 has multiple uses, change other uses as well. if (NoReplaceTruncAnd) { SDValue TruncAnd = DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And); CombineTo(N0.getNode(), TruncAnd); } if (NoReplaceTrunc) { DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1)); } else { SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00), LN00->getValueType(0), ExtLoad); CombineTo(LN00, Trunc, ExtLoad.getValue(1)); } return SDValue(N,0); // Return N so it doesn't get rechecked! } } } if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations)) return V; if (SDValue V = foldSextSetcc(N)) return V; // fold (sext x) -> (zext x) if the sign bit is known zero. if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0); if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) return NewVSel; // Eliminate this sign extend by doing a negation in the destination type: // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64) if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() && isNullOrNullSplat(N0.getOperand(0)) && N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND && TLI.isOperationLegalOrCustom(ISD::SUB, VT)) { SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT); return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext); } // Eliminate this sign extend by doing a decrement in the destination type: // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1) if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && isAllOnesOrAllOnesSplat(N0.getOperand(1)) && N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && TLI.isOperationLegalOrCustom(ISD::ADD, VT)) { SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT); return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT)); } // fold sext (not i1 X) -> add (zext i1 X), -1 // TODO: This could be extended to handle bool vectors. if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() && (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) && TLI.isOperationLegal(ISD::ADD, VT)))) { // If we can eliminate the 'not', the sext form should be better if (SDValue NewXor = visitXOR(N0.getNode())) { // Returning N0 is a form of in-visit replacement that may have // invalidated N0. if (NewXor.getNode() == N0.getNode()) { // Return SDValue here as the xor should have already been replaced in // this sext. return SDValue(); } // Return a new sext with the new xor. return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor); } SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT)); } if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG)) return Res; return SDValue(); } // isTruncateOf - If N is a truncate of some other value, return true, record // the value being truncated in Op and which of Op's bits are zero/one in Known. // This function computes KnownBits to avoid a duplicated call to // computeKnownBits in the caller. static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, KnownBits &Known) { if (N->getOpcode() == ISD::TRUNCATE) { Op = N->getOperand(0); Known = DAG.computeKnownBits(Op); return true; } if (N.getOpcode() != ISD::SETCC || N.getValueType().getScalarType() != MVT::i1 || cast(N.getOperand(2))->get() != ISD::SETNE) return false; SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); assert(Op0.getValueType() == Op1.getValueType()); if (isNullOrNullSplat(Op0)) Op = Op1; else if (isNullOrNullSplat(Op1)) Op = Op0; else return false; Known = DAG.computeKnownBits(Op); return (Known.Zero | 1).isAllOnes(); } /// Given an extending node with a pop-count operand, if the target does not /// support a pop-count in the narrow source type but does support it in the /// destination type, widen the pop-count to the destination type. static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) { assert((Extend->getOpcode() == ISD::ZERO_EXTEND || Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op"); SDValue CtPop = Extend->getOperand(0); if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse()) return SDValue(); EVT VT = Extend->getValueType(0); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) || !TLI.isOperationLegalOrCustom(ISD::CTPOP, VT)) return SDValue(); // zext (ctpop X) --> ctpop (zext X) SDLoc DL(Extend); SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT); return DAG.getNode(ISD::CTPOP, DL, VT, NewZext); } SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // zext(undef) = 0 if (N0.isUndef()) return DAG.getConstant(0, SDLoc(N), VT); if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) return Res; // fold (zext (zext x)) -> (zext x) // fold (zext (aext x)) -> (zext x) if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0)); // fold (zext (truncate x)) -> (zext x) or // (zext (truncate x)) -> (truncate x) // This is valid when the truncated bits of x are already zero. SDValue Op; KnownBits Known; if (isTruncateOf(DAG, N0, Op, Known)) { APInt TruncatedBits = (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ? APInt(Op.getScalarValueSizeInBits(), 0) : APInt::getBitsSet(Op.getScalarValueSizeInBits(), N0.getScalarValueSizeInBits(), std::min(Op.getScalarValueSizeInBits(), VT.getScalarSizeInBits())); if (TruncatedBits.isSubsetOf(Known.Zero)) return DAG.getZExtOrTrunc(Op, SDLoc(N), VT); } // fold (zext (truncate x)) -> (and x, mask) if (N0.getOpcode() == ISD::TRUNCATE) { // fold (zext (truncate (load x))) -> (zext (smaller load x)) // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n))) if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) { SDNode *oye = N0.getOperand(0).getNode(); if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); // CombineTo deleted the truncate, if needed, but not what's under it. AddToWorklist(oye); } return SDValue(N, 0); // Return N so it doesn't get rechecked! } EVT SrcVT = N0.getOperand(0).getValueType(); EVT MinVT = N0.getValueType(); // Try to mask before the extension to avoid having to generate a larger mask, // possibly over several sub-vectors. if (SrcVT.bitsLT(VT) && VT.isVector()) { if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) && TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) { SDValue Op = N0.getOperand(0); Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT); AddToWorklist(Op.getNode()); SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT); // Transfer the debug info; the new node is equivalent to N0. DAG.transferDbgValues(N0, ZExtOrTrunc); return ZExtOrTrunc; } } if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) { SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT); AddToWorklist(Op.getNode()); SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT); // We may safely transfer the debug info describing the truncate node over // to the equivalent and operation. DAG.transferDbgValues(N0, And); return And; } } // Fold (zext (and (trunc x), cst)) -> (and x, cst), // if either of the casts is not free. if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::TRUNCATE && N0.getOperand(1).getOpcode() == ISD::Constant && (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), N0.getValueType()) || !TLI.isZExtFree(N0.getValueType(), VT))) { SDValue X = N0.getOperand(0).getOperand(0); X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT); APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits()); SDLoc DL(N); return DAG.getNode(ISD::AND, DL, VT, X, DAG.getConstant(Mask, DL, VT)); } // Try to simplify (zext (load x)). if (SDValue foldedExt = tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD, ISD::ZERO_EXTEND)) return foldedExt; if (SDValue foldedExt = tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD, ISD::ZERO_EXTEND)) return foldedExt; // fold (zext (load x)) to multiple smaller zextloads. // Only on illegal but splittable vectors. if (SDValue ExtLoad = CombineExtLoad(N)) return ExtLoad; // fold (zext (and/or/xor (load x), cst)) -> // (and/or/xor (zextload x), (zext cst)) // Unless (and (load x) cst) will match as a zextload already and has // additional users. if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::XOR) && isa(N0.getOperand(0)) && N0.getOperand(1).getOpcode() == ISD::Constant && (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { LoadSDNode *LN00 = cast(N0.getOperand(0)); EVT MemVT = LN00->getMemoryVT(); if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) && LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) { bool DoXform = true; SmallVector SetCCs; if (!N0.hasOneUse()) { if (N0.getOpcode() == ISD::AND) { auto *AndC = cast(N0.getOperand(1)); EVT LoadResultTy = AndC->getValueType(0); EVT ExtVT; if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT)) DoXform = false; } } if (DoXform) DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0), ISD::ZERO_EXTEND, SetCCs, TLI); if (DoXform) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT, LN00->getChain(), LN00->getBasePtr(), LN00->getMemoryVT(), LN00->getMemOperand()); APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits()); SDLoc DL(N); SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, ExtLoad, DAG.getConstant(Mask, DL, VT)); ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND); bool NoReplaceTruncAnd = !N0.hasOneUse(); bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse(); CombineTo(N, And); // If N0 has multiple uses, change other uses as well. if (NoReplaceTruncAnd) { SDValue TruncAnd = DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And); CombineTo(N0.getNode(), TruncAnd); } if (NoReplaceTrunc) { DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1)); } else { SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00), LN00->getValueType(0), ExtLoad); CombineTo(LN00, Trunc, ExtLoad.getValue(1)); } return SDValue(N,0); // Return N so it doesn't get rechecked! } } } // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) -> // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst)) if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N)) return ZExtLoad; // Try to simplify (zext (zextload x)). if (SDValue foldedExt = tryToFoldExtOfExtload( DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD)) return foldedExt; if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations)) return V; if (N0.getOpcode() == ISD::SETCC) { // Propagate fast-math-flags. SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags()); // Only do this before legalize for now. if (!LegalOperations && VT.isVector() && N0.getValueType().getVectorElementType() == MVT::i1) { EVT N00VT = N0.getOperand(0).getValueType(); if (getSetCCResultType(N00VT) == N0.getValueType()) return SDValue(); // We know that the # elements of the results is the same as the # // elements of the compare (and the # elements of the compare result for // that matter). Check to see that they are the same size. If so, we know // that the element size of the sext'd result matches the element size of // the compare operands. SDLoc DL(N); if (VT.getSizeInBits() == N00VT.getSizeInBits()) { // zext(setcc) -> zext_in_reg(vsetcc) for vectors. SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0), N0.getOperand(1), N0.getOperand(2)); return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType()); } // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then // truncate/any extend followed by zext_in_reg. EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger(); SDValue VsetCC = DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0), N0.getOperand(1), N0.getOperand(2)); return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL, N0.getValueType()); } // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc) SDLoc DL(N); EVT N0VT = N0.getValueType(); EVT N00VT = N0.getOperand(0).getValueType(); if (SDValue SCC = SimplifySelectCC( DL, N0.getOperand(0), N0.getOperand(1), DAG.getBoolConstant(true, DL, N0VT, N00VT), DAG.getBoolConstant(false, DL, N0VT, N00VT), cast(N0.getOperand(2))->get(), true)) return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC); } // (zext (shl (zext x), cst)) -> (shl (zext x), cst) if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) && isa(N0.getOperand(1)) && N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse()) { SDValue ShAmt = N0.getOperand(1); if (N0.getOpcode() == ISD::SHL) { SDValue InnerZExt = N0.getOperand(0); // If the original shl may be shifting out bits, do not perform this // transformation. unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() - InnerZExt.getOperand(0).getValueSizeInBits(); if (cast(ShAmt)->getAPIntValue().ugt(KnownZeroBits)) return SDValue(); } SDLoc DL(N); // Ensure that the shift amount is wide enough for the shifted value. if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits()) ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt); return DAG.getNode(N0.getOpcode(), DL, VT, DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)), ShAmt); } if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) return NewVSel; if (SDValue NewCtPop = widenCtPop(N, DAG)) return NewCtPop; if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG)) return Res; return SDValue(); } SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // aext(undef) = undef if (N0.isUndef()) return DAG.getUNDEF(VT); if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) return Res; // fold (aext (aext x)) -> (aext x) // fold (aext (zext x)) -> (zext x) // fold (aext (sext x)) -> (sext x) if (N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND) return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); // fold (aext (truncate (load x))) -> (aext (smaller load x)) // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) if (N0.getOpcode() == ISD::TRUNCATE) { if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) { SDNode *oye = N0.getOperand(0).getNode(); if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); // CombineTo deleted the truncate, if needed, but not what's under it. AddToWorklist(oye); } return SDValue(N, 0); // Return N so it doesn't get rechecked! } } // fold (aext (truncate x)) if (N0.getOpcode() == ISD::TRUNCATE) return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT); // Fold (aext (and (trunc x), cst)) -> (and x, cst) // if the trunc is not free. if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::TRUNCATE && N0.getOperand(1).getOpcode() == ISD::Constant && !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), N0.getValueType())) { SDLoc DL(N); SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT); SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1)); assert(isa(Y) && "Expected constant to be folded!"); return DAG.getNode(ISD::AND, DL, VT, X, Y); } // fold (aext (load x)) -> (aext (truncate (extload x))) // None of the supported targets knows how to perform load and any_ext // on vectors in one instruction, so attempt to fold to zext instead. if (VT.isVector()) { // Try to simplify (zext (load x)). if (SDValue foldedExt = tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD, ISD::ZERO_EXTEND)) return foldedExt; } else if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { bool DoXform = true; SmallVector SetCCs; if (!N0.hasOneUse()) DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(), LN0->getBasePtr(), N0.getValueType(), LN0->getMemOperand()); ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND); // If the load value is used only by N, replace it via CombineTo N. bool NoReplaceTrunc = N0.hasOneUse(); CombineTo(N, ExtLoad); if (NoReplaceTrunc) { DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); recursivelyDeleteUnusedNodes(LN0); } else { SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); CombineTo(LN0, Trunc, ExtLoad.getValue(1)); } return SDValue(N, 0); // Return N so it doesn't get rechecked! } } // fold (aext (zextload x)) -> (aext (truncate (zextload x))) // fold (aext (sextload x)) -> (aext (truncate (sextload x))) // fold (aext ( extload x)) -> (aext (truncate (extload x))) if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { LoadSDNode *LN0 = cast(N0); ISD::LoadExtType ExtType = LN0->getExtensionType(); EVT MemVT = LN0->getMemoryVT(); if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) { SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N), VT, LN0->getChain(), LN0->getBasePtr(), MemVT, LN0->getMemOperand()); CombineTo(N, ExtLoad); DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); recursivelyDeleteUnusedNodes(LN0); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } if (N0.getOpcode() == ISD::SETCC) { // Propagate fast-math-flags. SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags()); // For vectors: // aext(setcc) -> vsetcc // aext(setcc) -> truncate(vsetcc) // aext(setcc) -> aext(vsetcc) // Only do this before legalize for now. if (VT.isVector() && !LegalOperations) { EVT N00VT = N0.getOperand(0).getValueType(); if (getSetCCResultType(N00VT) == N0.getValueType()) return SDValue(); // We know that the # elements of the results is the same as the // # elements of the compare (and the # elements of the compare result // for that matter). Check to see that they are the same size. If so, // we know that the element size of the sext'd result matches the // element size of the compare operands. if (VT.getSizeInBits() == N00VT.getSizeInBits()) return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), cast(N0.getOperand(2))->get()); // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then // truncate/any extend EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger(); SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), N0.getOperand(1), cast(N0.getOperand(2))->get()); return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT); } // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc SDLoc DL(N); if (SDValue SCC = SimplifySelectCC( DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT), cast(N0.getOperand(2))->get(), true)) return SCC; } if (SDValue NewCtPop = widenCtPop(N, DAG)) return NewCtPop; if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG)) return Res; return SDValue(); } SDValue DAGCombiner::visitAssertExt(SDNode *N) { unsigned Opcode = N->getOpcode(); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT AssertVT = cast(N1)->getVT(); // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt) if (N0.getOpcode() == Opcode && AssertVT == cast(N0.getOperand(1))->getVT()) return N0; if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && N0.getOperand(0).getOpcode() == Opcode) { // We have an assert, truncate, assert sandwich. Make one stronger assert // by asserting on the smallest asserted type to the larger source type. // This eliminates the later assert: // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN SDLoc DL(N); SDValue BigA = N0.getOperand(0); EVT BigA_AssertVT = cast(BigA.getOperand(1))->getVT(); EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT; SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT); SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(), BigA.getOperand(0), MinAssertVTVal); return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert); } // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller // than X. Just move the AssertZext in front of the truncate and drop the // AssertSExt. if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && N0.getOperand(0).getOpcode() == ISD::AssertSext && Opcode == ISD::AssertZext) { SDValue BigA = N0.getOperand(0); EVT BigA_AssertVT = cast(BigA.getOperand(1))->getVT(); if (AssertVT.bitsLT(BigA_AssertVT)) { SDLoc DL(N); SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(), BigA.getOperand(0), N1); return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert); } } return SDValue(); } SDValue DAGCombiner::visitAssertAlign(SDNode *N) { SDLoc DL(N); Align AL = cast(N)->getAlign(); SDValue N0 = N->getOperand(0); // Fold (assertalign (assertalign x, AL0), AL1) -> // (assertalign x, max(AL0, AL1)) if (auto *AAN = dyn_cast(N0)) return DAG.getAssertAlign(DL, N0.getOperand(0), std::max(AL, AAN->getAlign())); // In rare cases, there are trivial arithmetic ops in source operands. Sink // this assert down to source operands so that those arithmetic ops could be // exposed to the DAG combining. switch (N0.getOpcode()) { default: break; case ISD::ADD: case ISD::SUB: { unsigned AlignShift = Log2(AL); SDValue LHS = N0.getOperand(0); SDValue RHS = N0.getOperand(1); unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros(); unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros(); if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) { if (LHSAlignShift < AlignShift) LHS = DAG.getAssertAlign(DL, LHS, AL); if (RHSAlignShift < AlignShift) RHS = DAG.getAssertAlign(DL, RHS, AL); return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS); } break; } } return SDValue(); } /// If the result of a load is shifted/masked/truncated to an effectively /// narrower type, try to transform the load to a narrower type and/or /// use an extending load. SDValue DAGCombiner::reduceLoadWidth(SDNode *N) { unsigned Opc = N->getOpcode(); ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); EVT ExtVT = VT; // This transformation isn't valid for vector loads. if (VT.isVector()) return SDValue(); // The ShAmt variable is used to indicate that we've consumed a right // shift. I.e. we want to narrow the width of the load by skipping to load the // ShAmt least significant bits. unsigned ShAmt = 0; // A special case is when the least significant bits from the load are masked // away, but using an AND rather than a right shift. HasShiftedOffset is used // to indicate that the narrowed load should be left-shifted ShAmt bits to get // the result. bool HasShiftedOffset = false; // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then // extended to VT. if (Opc == ISD::SIGN_EXTEND_INREG) { ExtType = ISD::SEXTLOAD; ExtVT = cast(N->getOperand(1))->getVT(); } else if (Opc == ISD::SRL || Opc == ISD::SRA) { // Another special-case: SRL/SRA is basically zero/sign-extending a narrower // value, or it may be shifting a higher subword, half or byte into the // lowest bits. // Only handle shift with constant shift amount, and the shiftee must be a // load. auto *LN = dyn_cast(N0); auto *N1C = dyn_cast(N->getOperand(1)); if (!N1C || !LN) return SDValue(); // If the shift amount is larger than the memory type then we're not // accessing any of the loaded bytes. ShAmt = N1C->getZExtValue(); uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits(); if (MemoryWidth <= ShAmt) return SDValue(); // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD. ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD; ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt); // If original load is a SEXTLOAD then we can't simply replace it by a // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD // followed by a ZEXT, but that is not handled at the moment). Similarly if // the original load is a ZEXTLOAD and we want to use a SEXTLOAD. if ((LN->getExtensionType() == ISD::SEXTLOAD || LN->getExtensionType() == ISD::ZEXTLOAD) && LN->getExtensionType() != ExtType) return SDValue(); } else if (Opc == ISD::AND) { // An AND with a constant mask is the same as a truncate + zero-extend. auto AndC = dyn_cast(N->getOperand(1)); if (!AndC) return SDValue(); const APInt &Mask = AndC->getAPIntValue(); unsigned ActiveBits = 0; if (Mask.isMask()) { ActiveBits = Mask.countTrailingOnes(); } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) { HasShiftedOffset = true; } else { return SDValue(); } ExtType = ISD::ZEXTLOAD; ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); } // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing // a right shift. Here we redo some of those checks, to possibly adjust the // ExtVT even further based on "a masking AND". We could also end up here for // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks // need to be done here as well. if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) { SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0; // Bail out when the SRL has more than one use. This is done for historical // (undocumented) reasons. Maybe intent was to guard the AND-masking below // check below? And maybe it could be non-profitable to do the transform in // case the SRL has multiple uses and we get here with Opc!=ISD::SRL? // FIXME: Can't we just skip this check for the Opc==ISD::SRL case. if (!SRL.hasOneUse()) return SDValue(); // Only handle shift with constant shift amount, and the shiftee must be a // load. auto *LN = dyn_cast(SRL.getOperand(0)); auto *SRL1C = dyn_cast(SRL.getOperand(1)); if (!SRL1C || !LN) return SDValue(); // If the shift amount is larger than the input type then we're not // accessing any of the loaded bytes. If the load was a zextload/extload // then the result of the shift+trunc is zero/undef (handled elsewhere). ShAmt = SRL1C->getZExtValue(); uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits(); if (ShAmt >= MemoryWidth) return SDValue(); // Because a SRL must be assumed to *need* to zero-extend the high bits // (as opposed to anyext the high bits), we can't combine the zextload // lowering of SRL and an sextload. if (LN->getExtensionType() == ISD::SEXTLOAD) return SDValue(); // Avoid reading outside the memory accessed by the original load (could // happened if we only adjust the load base pointer by ShAmt). Instead we // try to narrow the load even further. The typical scenario here is: // (i64 (truncate (i96 (srl (load x), 64)))) -> // (i64 (truncate (i96 (zextload (load i32 + offset) from i32)))) if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) { // Don't replace sextload by zextload. if (ExtType == ISD::SEXTLOAD) return SDValue(); // Narrow the load. ExtType = ISD::ZEXTLOAD; ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt); } // If the SRL is only used by a masking AND, we may be able to adjust // the ExtVT to make the AND redundant. SDNode *Mask = *(SRL->use_begin()); if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND && isa(Mask->getOperand(1))) { const APInt& ShiftMask = Mask->getConstantOperandAPInt(1); if (ShiftMask.isMask()) { EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), ShiftMask.countTrailingOnes()); // If the mask is smaller, recompute the type. if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) && TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT)) ExtVT = MaskedVT; } } N0 = SRL.getOperand(0); } // If the load is shifted left (and the result isn't shifted back right), we // can fold a truncate through the shift. The typical scenario is that N // points at a TRUNCATE here so the attempted fold is: // (truncate (shl (load x), c))) -> (shl (narrow load x), c) // ShLeftAmt will indicate how much a narrowed load should be shifted left. unsigned ShLeftAmt = 0; if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() && ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) { if (ConstantSDNode *N01 = dyn_cast(N0.getOperand(1))) { ShLeftAmt = N01->getZExtValue(); N0 = N0.getOperand(0); } } // If we haven't found a load, we can't narrow it. if (!isa(N0)) return SDValue(); LoadSDNode *LN0 = cast(N0); // Reducing the width of a volatile load is illegal. For atomics, we may be // able to reduce the width provided we never widen again. (see D66309) if (!LN0->isSimple() || !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt)) return SDValue(); auto AdjustBigEndianShift = [&](unsigned ShAmt) { unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits().getFixedSize(); unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedSize(); return LVTStoreBits - EVTStoreBits - ShAmt; }; // We need to adjust the pointer to the load by ShAmt bits in order to load // the correct bytes. unsigned PtrAdjustmentInBits = DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt; uint64_t PtrOff = PtrAdjustmentInBits / 8; Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff); SDLoc DL(LN0); // The original load itself didn't wrap, so an offset within it doesn't. SDNodeFlags Flags; Flags.setNoUnsignedWrap(true); SDValue NewPtr = DAG.getMemBasePlusOffset(LN0->getBasePtr(), TypeSize::Fixed(PtrOff), DL, Flags); AddToWorklist(NewPtr.getNode()); SDValue Load; if (ExtType == ISD::NON_EXTLOAD) Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign, LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); else Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT, NewAlign, LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); // Replace the old load's chain with the new load's chain. WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); // Shift the result left, if we've swallowed a left shift. SDValue Result = Load; if (ShLeftAmt != 0) { EVT ShImmTy = getShiftAmountTy(Result.getValueType()); if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt)) ShImmTy = VT; // If the shift amount is as large as the result size (but, presumably, // no larger than the source) then the useful bits of the result are // zero; we can't simply return the shortened shift, because the result // of that operation is undefined. if (ShLeftAmt >= VT.getScalarSizeInBits()) Result = DAG.getConstant(0, DL, VT); else Result = DAG.getNode(ISD::SHL, DL, VT, Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy)); } if (HasShiftedOffset) { // We're using a shifted mask, so the load now has an offset. This means // that data has been loaded into the lower bytes than it would have been // before, so we need to shl the loaded data into the correct position in the // register. SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT); Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); } // Return the new loaded value. return Result; } SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); EVT ExtVT = cast(N1)->getVT(); unsigned VTBits = VT.getScalarSizeInBits(); unsigned ExtVTBits = ExtVT.getScalarSizeInBits(); // sext_vector_inreg(undef) = 0 because the top bit will all be the same. if (N0.isUndef()) return DAG.getConstant(0, SDLoc(N), VT); // fold (sext_in_reg c1) -> c1 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1); // If the input is already sign extended, just drop the extension. if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0)) return N0; // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && ExtVT.bitsLT(cast(N0.getOperand(1))->getVT())) return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0), N1); // fold (sext_in_reg (sext x)) -> (sext x) // fold (sext_in_reg (aext x)) -> (sext x) // if x is small enough or if we know that x has more than 1 sign bit and the // sign_extend_inreg is extending from one of them. if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { SDValue N00 = N0.getOperand(0); unsigned N00Bits = N00.getScalarValueSizeInBits(); if ((N00Bits <= ExtVTBits || DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) && (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00); } // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x) // if x is small enough or if we know that x has more than 1 sign bit and the // sign_extend_inreg is extending from one of them. if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG || N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG || N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) { SDValue N00 = N0.getOperand(0); unsigned N00Bits = N00.getScalarValueSizeInBits(); unsigned DstElts = N0.getValueType().getVectorMinNumElements(); unsigned SrcElts = N00.getValueType().getVectorMinNumElements(); bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG; APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts); if ((N00Bits == ExtVTBits || (!IsZext && (N00Bits < ExtVTBits || DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) && (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))) return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00); } // fold (sext_in_reg (zext x)) -> (sext x) // iff we are extending the source sign bit. if (N0.getOpcode() == ISD::ZERO_EXTEND) { SDValue N00 = N0.getOperand(0); if (N00.getScalarValueSizeInBits() == ExtVTBits && (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1); } // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1))) return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT); // fold operands of sext_in_reg based on knowledge that the top bits are not // demanded. if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); // fold (sext_in_reg (load x)) -> (smaller sextload x) // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits)) if (SDValue NarrowLoad = reduceLoadWidth(N)) return NarrowLoad; // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24) // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible. // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above. if (N0.getOpcode() == ISD::SRL) { if (auto *ShAmt = dyn_cast(N0.getOperand(1))) if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) { // We can turn this into an SRA iff the input to the SRL is already sign // extended enough. unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits) return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1)); } } // fold (sext_inreg (extload x)) -> (sextload x) // If sextload is not supported by target, we can only do the combine when // load has one use. Doing otherwise can block folding the extload with other // extends that the target does support. if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && ExtVT == cast(N0)->getMemoryVT() && ((!LegalOperations && cast(N0)->isSimple() && N0.hasOneUse()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) { LoadSDNode *LN0 = cast(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), LN0->getBasePtr(), ExtVT, LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); AddToWorklist(ExtLoad.getNode()); return SDValue(N, 0); // Return N so it doesn't get rechecked! } // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse() && ExtVT == cast(N0)->getMemoryVT() && ((!LegalOperations && cast(N0)->isSimple()) && TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) { LoadSDNode *LN0 = cast(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), LN0->getBasePtr(), ExtVT, LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } // fold (sext_inreg (masked_load x)) -> (sext_masked_load x) // ignore it if the masked load is already sign extended if (MaskedLoadSDNode *Ld = dyn_cast(N0)) { if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() && Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD && TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) { SDValue ExtMaskedLoad = DAG.getMaskedLoad( VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(), Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad()); CombineTo(N, ExtMaskedLoad); CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x) if (auto *GN0 = dyn_cast(N0)) { if (SDValue(GN0, 0).hasOneUse() && ExtVT == GN0->getMemoryVT() && TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) { SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(), GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()}; SDValue ExtLoad = DAG.getMaskedGather( DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops, GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); AddToWorklist(ExtLoad.getNode()); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16)) if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) { if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), N0.getOperand(1), false)) return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1); } return SDValue(); } SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same. if (N0.isUndef()) return DAG.getConstant(0, SDLoc(N), VT); if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) return Res; if (SimplifyDemandedVectorElts(SDValue(N, 0))) return SDValue(N, 0); return SDValue(); } SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); EVT SrcVT = N0.getValueType(); bool isLE = DAG.getDataLayout().isLittleEndian(); // noop truncate if (SrcVT == VT) return N0; // fold (truncate (truncate x)) -> (truncate x) if (N0.getOpcode() == ISD::TRUNCATE) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); // fold (truncate c1) -> c1 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) { SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0); if (C.getNode() != N) return C; } // fold (truncate (ext x)) -> (ext x) or (truncate x) or x if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { // if the source is smaller than the dest, we still need an extend. if (N0.getOperand(0).getValueType().bitsLT(VT)) return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); // if the source is larger than the dest, than we just need the truncate. if (N0.getOperand(0).getValueType().bitsGT(VT)) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); // if the source and dest are the same type, we can drop both the extend // and the truncate. return N0.getOperand(0); } // Try to narrow a truncate-of-sext_in_reg to the destination type: // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse()) { SDValue X = N0.getOperand(0); SDValue ExtVal = N0.getOperand(1); EVT ExtVT = cast(ExtVal)->getVT(); if (ExtVT.bitsLT(VT)) { SDValue TrX = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X); return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, TrX, ExtVal); } } // If this is anyext(trunc), don't fold it, allow ourselves to be folded. if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND)) return SDValue(); // Fold extract-and-trunc into a narrow extract. For example: // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1) // i32 y = TRUNCATE(i64 x) // -- becomes -- // v16i8 b = BITCAST (v2i64 val) // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8) // // Note: We only run this optimization after type legalization (which often // creates this pattern) and before operation legalization after which // we need to be more careful about the vector instructions that we generate. if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) { EVT VecTy = N0.getOperand(0).getValueType(); EVT ExTy = N0.getValueType(); EVT TrTy = N->getValueType(0); auto EltCnt = VecTy.getVectorElementCount(); unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits(); auto NewEltCnt = EltCnt * SizeRatio; EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt); assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size"); SDValue EltNo = N0->getOperand(1); if (isa(EltNo) && isTypeLegal(NVT)) { int Elt = cast(EltNo)->getZExtValue(); int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); SDLoc DL(N); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy, DAG.getBitcast(NVT, N0.getOperand(0)), DAG.getVectorIdxConstant(Index, DL)); } } // trunc (select c, a, b) -> select c, (trunc a), (trunc b) if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) { if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) && TLI.isTruncateFree(SrcVT, VT)) { SDLoc SL(N0); SDValue Cond = N0.getOperand(0); SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1)); SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2)); return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1); } } // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits() if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() && (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) && TLI.isTypeDesirableForOp(ISD::SHL, VT)) { SDValue Amt = N0.getOperand(1); KnownBits Known = DAG.computeKnownBits(Amt); unsigned Size = VT.getScalarSizeInBits(); if (Known.countMaxActiveBits() <= Log2_32(Size)) { SDLoc SL(N); EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0)); if (AmtVT != Amt.getValueType()) { Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT); AddToWorklist(Amt.getNode()); } return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt); } } if (SDValue V = foldSubToUSubSat(VT, N0.getNode())) return V; // Attempt to pre-truncate BUILD_VECTOR sources. if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations && TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) && // Avoid creating illegal types if running after type legalizer. (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) { SDLoc DL(N); EVT SVT = VT.getScalarType(); SmallVector TruncOps; for (const SDValue &Op : N0->op_values()) { SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op); TruncOps.push_back(TruncOp); } return DAG.getBuildVector(VT, DL, TruncOps); } // Fold a series of buildvector, bitcast, and truncate if possible. // For example fold // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to // (2xi32 (buildvector x, y)). if (Level == AfterLegalizeVectorOps && VT.isVector() && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR && N0.getOperand(0).hasOneUse()) { SDValue BuildVect = N0.getOperand(0); EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType(); EVT TruncVecEltTy = VT.getVectorElementType(); // Check that the element types match. if (BuildVectEltTy == TruncVecEltTy) { // Now we only need to compute the offset of the truncated elements. unsigned BuildVecNumElts = BuildVect.getNumOperands(); unsigned TruncVecNumElts = VT.getVectorNumElements(); unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts; assert((BuildVecNumElts % TruncVecNumElts) == 0 && "Invalid number of elements"); SmallVector Opnds; for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset) Opnds.push_back(BuildVect.getOperand(i)); return DAG.getBuildVector(VT, SDLoc(N), Opnds); } } // fold (truncate (load x)) -> (smaller load x) // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { if (SDValue Reduced = reduceLoadWidth(N)) return Reduced; // Handle the case where the load remains an extending load even // after truncation. if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) { LoadSDNode *LN0 = cast(N0); if (LN0->isSimple() && LN0->getMemoryVT().bitsLT(VT)) { SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(), LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand()); DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1)); return NewLoad; } } } // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)), // where ... are all 'undef'. if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) { SmallVector VTs; SDValue V; unsigned Idx = 0; unsigned NumDefs = 0; for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { SDValue X = N0.getOperand(i); if (!X.isUndef()) { V = X; Idx = i; NumDefs++; } // Stop if more than one members are non-undef. if (NumDefs > 1) break; VTs.push_back(EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), X.getValueType().getVectorElementCount())); } if (NumDefs == 0) return DAG.getUNDEF(VT); if (NumDefs == 1) { assert(V.getNode() && "The single defined operand is empty!"); SmallVector Opnds; for (unsigned i = 0, e = VTs.size(); i != e; ++i) { if (i != Idx) { Opnds.push_back(DAG.getUNDEF(VTs[i])); continue; } SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V); AddToWorklist(NV.getNode()); Opnds.push_back(NV); } return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds); } } // Fold truncate of a bitcast of a vector to an extract of the low vector // element. // // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) { SDValue VecSrc = N0.getOperand(0); EVT VecSrcVT = VecSrc.getValueType(); if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT && (!LegalOperations || TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) { SDLoc SL(N); unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1; return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc, DAG.getVectorIdxConstant(Idx, SL)); } } // Simplify the operands using demanded-bits information. if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); // See if we can simplify the input to this truncate through knowledge that // only the low bits are being used. // For example "trunc (or (shl x, 8), y)" // -> trunc y // Currently we only perform this optimization on scalars because vectors // may have different active low bits. if (!VT.isVector()) { APInt Mask = APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits()); if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask)) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter); } // fold (truncate (extract_subvector(ext x))) -> // (extract_subvector x) // TODO: This can be generalized to cover cases where the truncate and extract // do not fully cancel each other out. if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) { SDValue N00 = N0.getOperand(0); if (N00.getOpcode() == ISD::SIGN_EXTEND || N00.getOpcode() == ISD::ZERO_EXTEND || N00.getOpcode() == ISD::ANY_EXTEND) { if (N00.getOperand(0)->getValueType(0).getVectorElementType() == VT.getVectorElementType()) return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT, N00.getOperand(0), N0.getOperand(1)); } } if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) return NewVSel; // Narrow a suitable binary operation with a non-opaque constant operand by // moving it ahead of the truncate. This is limited to pre-legalization // because targets may prefer a wider type during later combines and invert // this transform. switch (N0.getOpcode()) { case ISD::ADD: case ISD::SUB: case ISD::MUL: case ISD::AND: case ISD::OR: case ISD::XOR: if (!LegalOperations && N0.hasOneUse() && (isConstantOrConstantVector(N0.getOperand(0), true) || isConstantOrConstantVector(N0.getOperand(1), true))) { // TODO: We already restricted this to pre-legalization, but for vectors // we are extra cautious to not create an unsupported operation. // Target-specific changes are likely needed to avoid regressions here. if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) { SDLoc DL(N); SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0)); SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1)); return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR); } } break; case ISD::ADDE: case ISD::ADDCARRY: // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry) // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry) // When the adde's carry is not used. // We only do for addcarry before legalize operation if (((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) || TLI.isOperationLegal(N0.getOpcode(), VT)) && N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) { SDLoc DL(N); SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0)); SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1)); SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1)); return DAG.getNode(N0.getOpcode(), DL, VTs, X, Y, N0.getOperand(2)); } break; case ISD::USUBSAT: // Truncate the USUBSAT only if LHS is a known zero-extension, its not // enough to know that the upper bits are zero we must ensure that we don't // introduce an extra truncate. if (!LegalOperations && N0.hasOneUse() && N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && N0.getOperand(0).getOperand(0).getScalarValueSizeInBits() <= VT.getScalarSizeInBits() && hasOperation(N0.getOpcode(), VT)) { return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1), DAG, SDLoc(N)); } break; } return SDValue(); } static SDNode *getBuildPairElt(SDNode *N, unsigned i) { SDValue Elt = N->getOperand(i); if (Elt.getOpcode() != ISD::MERGE_VALUES) return Elt.getNode(); return Elt.getOperand(Elt.getResNo()).getNode(); } /// build_pair (load, load) -> load /// if load locations are consecutive. SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { assert(N->getOpcode() == ISD::BUILD_PAIR); auto *LD1 = dyn_cast(getBuildPairElt(N, 0)); auto *LD2 = dyn_cast(getBuildPairElt(N, 1)); // A BUILD_PAIR is always having the least significant part in elt 0 and the // most significant part in elt 1. So when combining into one large load, we // need to consider the endianness. if (DAG.getDataLayout().isBigEndian()) std::swap(LD1, LD2); if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) || !LD1->hasOneUse() || !LD2->hasOneUse() || LD1->getAddressSpace() != LD2->getAddressSpace()) return SDValue(); bool LD1Fast = false; EVT LD1VT = LD1->getValueType(0); unsigned LD1Bytes = LD1VT.getStoreSize(); if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) && TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, *LD1->getMemOperand(), &LD1Fast) && LD1Fast) return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(), LD1->getPointerInfo(), LD1->getAlign()); return SDValue(); } static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) { // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi // and Lo parts; on big-endian machines it doesn't. return DAG.getDataLayout().isBigEndian() ? 1 : 0; } static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI) { // If this is not a bitcast to an FP type or if the target doesn't have // IEEE754-compliant FP logic, we're done. EVT VT = N->getValueType(0); if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT)) return SDValue(); // TODO: Handle cases where the integer constant is a different scalar // bitwidth to the FP. SDValue N0 = N->getOperand(0); EVT SourceVT = N0.getValueType(); if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits()) return SDValue(); unsigned FPOpcode; APInt SignMask; switch (N0.getOpcode()) { case ISD::AND: FPOpcode = ISD::FABS; SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits()); break; case ISD::XOR: FPOpcode = ISD::FNEG; SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits()); break; case ISD::OR: FPOpcode = ISD::FABS; SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits()); break; default: return SDValue(); } // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) -> // fneg (fabs X) SDValue LogicOp0 = N0.getOperand(0); ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true); if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask && LogicOp0.getOpcode() == ISD::BITCAST && LogicOp0.getOperand(0).getValueType() == VT) { SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0)); NumFPLogicOpsConv++; if (N0.getOpcode() == ISD::OR) return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp); return FPOp; } return SDValue(); } SDValue DAGCombiner::visitBITCAST(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); if (N0.isUndef()) return DAG.getUNDEF(VT); // If the input is a BUILD_VECTOR with all constant elements, fold this now. // Only do this before legalize types, unless both types are integer and the // scalar type is legal. Only do this before legalize ops, since the target // maybe depending on the bitcast. // First check to see if this is all constant. // TODO: Support FP bitcasts after legalize types. if (VT.isVector() && (!LegalTypes || (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() && TLI.isTypeLegal(VT.getVectorElementType()))) && N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() && cast(N0)->isConstant()) return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), VT.getVectorElementType()); // If the input is a constant, let getNode fold it. if (isIntOrFPConstant(N0)) { // If we can't allow illegal operations, we need to check that this is just // a fp -> int or int -> conversion and that the resulting operation will // be legal. if (!LegalOperations || (isa(N0) && VT.isFloatingPoint() && !VT.isVector() && TLI.isOperationLegal(ISD::ConstantFP, VT)) || (isa(N0) && VT.isInteger() && !VT.isVector() && TLI.isOperationLegal(ISD::Constant, VT))) { SDValue C = DAG.getBitcast(VT, N0); if (C.getNode() != N) return C; } } // (conv (conv x, t1), t2) -> (conv x, t2) if (N0.getOpcode() == ISD::BITCAST) return DAG.getBitcast(VT, N0.getOperand(0)); // fold (conv (load x)) -> (load (conv*)x) // If the resultant load doesn't need a higher alignment than the original! if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && // Do not remove the cast if the types differ in endian layout. TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) == TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) && // If the load is volatile, we only want to change the load type if the // resulting load is legal. Otherwise we might increase the number of // memory accesses. We don't care if the original type was legal or not // as we assume software couldn't rely on the number of accesses of an // illegal type. ((!LegalOperations && cast(N0)->isSimple()) || TLI.isOperationLegal(ISD::LOAD, VT))) { LoadSDNode *LN0 = cast(N0); if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG, *LN0->getMemOperand())) { SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(), LN0->getPointerInfo(), LN0->getAlign(), LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); return Load; } } if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI)) return V; // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) // // For ppc_fp128: // fold (bitcast (fneg x)) -> // flipbit = signbit // (xor (bitcast x) (build_pair flipbit, flipbit)) // // fold (bitcast (fabs x)) -> // flipbit = (and (extract_element (bitcast x), 0), signbit) // (xor (bitcast x) (build_pair flipbit, flipbit)) // This often reduces constant pool loads. if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) || (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) && N0->hasOneUse() && VT.isInteger() && !VT.isVector() && !N0.getValueType().isVector()) { SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0)); AddToWorklist(NewConv.getNode()); SDLoc DL(N); if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) { assert(VT.getSizeInBits() == 128); SDValue SignBit = DAG.getConstant( APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64); SDValue FlipBit; if (N0.getOpcode() == ISD::FNEG) { FlipBit = SignBit; AddToWorklist(FlipBit.getNode()); } else { assert(N0.getOpcode() == ISD::FABS); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv, DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG), SDLoc(NewConv))); AddToWorklist(Hi.getNode()); FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit); AddToWorklist(FlipBit.getNode()); } SDValue FlipBits = DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit); AddToWorklist(FlipBits.getNode()); return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits); } APInt SignBit = APInt::getSignMask(VT.getSizeInBits()); if (N0.getOpcode() == ISD::FNEG) return DAG.getNode(ISD::XOR, DL, VT, NewConv, DAG.getConstant(SignBit, DL, VT)); assert(N0.getOpcode() == ISD::FABS); return DAG.getNode(ISD::AND, DL, VT, NewConv, DAG.getConstant(~SignBit, DL, VT)); } // fold (bitconvert (fcopysign cst, x)) -> // (or (and (bitconvert x), sign), (and cst, (not sign))) // Note that we don't handle (copysign x, cst) because this can always be // folded to an fneg or fabs. // // For ppc_fp128: // fold (bitcast (fcopysign cst, x)) -> // flipbit = (and (extract_element // (xor (bitcast cst), (bitcast x)), 0), // signbit) // (xor (bitcast cst) (build_pair flipbit, flipbit)) if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() && isa(N0.getOperand(0)) && VT.isInteger() && !VT.isVector()) { unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits(); EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); if (isTypeLegal(IntXVT)) { SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1)); AddToWorklist(X.getNode()); // If X has a different width than the result/lhs, sext it or truncate it. unsigned VTWidth = VT.getSizeInBits(); if (OrigXWidth < VTWidth) { X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X); AddToWorklist(X.getNode()); } else if (OrigXWidth > VTWidth) { // To get the sign bit in the right place, we have to shift it right // before truncating. SDLoc DL(X); X = DAG.getNode(ISD::SRL, DL, X.getValueType(), X, DAG.getConstant(OrigXWidth-VTWidth, DL, X.getValueType())); AddToWorklist(X.getNode()); X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); AddToWorklist(X.getNode()); } if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) { APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2); SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0)); AddToWorklist(Cst.getNode()); SDValue X = DAG.getBitcast(VT, N0.getOperand(1)); AddToWorklist(X.getNode()); SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X); AddToWorklist(XorResult.getNode()); SDValue XorResult64 = DAG.getNode( ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult, DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG), SDLoc(XorResult))); AddToWorklist(XorResult64.getNode()); SDValue FlipBit = DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64, DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64)); AddToWorklist(FlipBit.getNode()); SDValue FlipBits = DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit); AddToWorklist(FlipBits.getNode()); return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits); } APInt SignBit = APInt::getSignMask(VT.getSizeInBits()); X = DAG.getNode(ISD::AND, SDLoc(X), VT, X, DAG.getConstant(SignBit, SDLoc(X), VT)); AddToWorklist(X.getNode()); SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0)); Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT, Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT)); AddToWorklist(Cst.getNode()); return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst); } } // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive. if (N0.getOpcode() == ISD::BUILD_PAIR) if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT)) return CombineLD; // Remove double bitcasts from shuffles - this is often a legacy of // XformToShuffleWithZero being used to combine bitmaskings (of // float vectors bitcast to integer vectors) into shuffles. // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1) if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() && N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() && VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() && !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) { ShuffleVectorSDNode *SVN = cast(N0); // If operands are a bitcast, peek through if it casts the original VT. // If operands are a constant, just bitcast back to original VT. auto PeekThroughBitcast = [&](SDValue Op) { if (Op.getOpcode() == ISD::BITCAST && Op.getOperand(0).getValueType() == VT) return SDValue(Op.getOperand(0)); if (Op.isUndef() || isAnyConstantBuildVector(Op)) return DAG.getBitcast(VT, Op); return SDValue(); }; // FIXME: If either input vector is bitcast, try to convert the shuffle to // the result type of this bitcast. This would eliminate at least one // bitcast. See the transform in InstCombine. SDValue SV0 = PeekThroughBitcast(N0->getOperand(0)); SDValue SV1 = PeekThroughBitcast(N0->getOperand(1)); if (!(SV0 && SV1)) return SDValue(); int MaskScale = VT.getVectorNumElements() / N0.getValueType().getVectorNumElements(); SmallVector NewMask; for (int M : SVN->getMask()) for (int i = 0; i != MaskScale; ++i) NewMask.push_back(M < 0 ? -1 : M * MaskScale + i); SDValue LegalShuffle = TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG); if (LegalShuffle) return LegalShuffle; } return SDValue(); } SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { EVT VT = N->getValueType(0); return CombineConsecutiveLoads(N, VT); } SDValue DAGCombiner::visitFREEZE(SDNode *N) { SDValue N0 = N->getOperand(0); if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false)) return N0; // Fold freeze(bitcast(x)) -> bitcast(freeze(x)). // TODO: Replace with pushFreezeToPreventPoisonFromPropagating fold. if (N0.getOpcode() == ISD::BITCAST) return DAG.getBitcast(N->getValueType(0), DAG.getNode(ISD::FREEZE, SDLoc(N0), N0.getOperand(0).getValueType(), N0.getOperand(0))); return SDValue(); } /// We know that BV is a build_vector node with Constant, ConstantFP or Undef /// operands. DstEltVT indicates the destination element value type. SDValue DAGCombiner:: ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { EVT SrcEltVT = BV->getValueType(0).getVectorElementType(); // If this is already the right type, we're done. if (SrcEltVT == DstEltVT) return SDValue(BV, 0); unsigned SrcBitSize = SrcEltVT.getSizeInBits(); unsigned DstBitSize = DstEltVT.getSizeInBits(); // If this is a conversion of N elements of one type to N elements of another // type, convert each element. This handles FP<->INT cases. if (SrcBitSize == DstBitSize) { SmallVector Ops; for (SDValue Op : BV->op_values()) { // If the vector element type is not legal, the BUILD_VECTOR operands // are promoted and implicitly truncated. Make that explicit here. if (Op.getValueType() != SrcEltVT) Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op); Ops.push_back(DAG.getBitcast(DstEltVT, Op)); AddToWorklist(Ops.back().getNode()); } EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, BV->getValueType(0).getVectorNumElements()); return DAG.getBuildVector(VT, SDLoc(BV), Ops); } // Otherwise, we're growing or shrinking the elements. To avoid having to // handle annoying details of growing/shrinking FP values, we convert them to // int first. if (SrcEltVT.isFloatingPoint()) { // Convert the input float vector to a int vector where the elements are the // same sizes. EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits()); BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode(); SrcEltVT = IntVT; } // Now we know the input is an integer vector. If the output is a FP type, // convert to integer first, then to FP of the right size. if (DstEltVT.isFloatingPoint()) { EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits()); SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode(); // Next, convert to FP elements of the same size. return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT); } // Okay, we know the src/dst types are both integers of differing types. assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a // BuildVectorSDNode? auto *BVN = cast(BV); // Extract the constant raw bit data. BitVector UndefElements; SmallVector RawBits; bool IsLE = DAG.getDataLayout().isLittleEndian(); if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements)) return SDValue(); SDLoc DL(BV); SmallVector Ops; for (unsigned I = 0, E = RawBits.size(); I != E; ++I) { if (UndefElements[I]) Ops.push_back(DAG.getUNDEF(DstEltVT)); else Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT)); } EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); return DAG.getBuildVector(VT, DL, Ops); } // Returns true if floating point contraction is allowed on the FMUL-SDValue // `N` static bool isContractableFMUL(const TargetOptions &Options, SDValue N) { assert(N.getOpcode() == ISD::FMUL); return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath || N->getFlags().hasAllowContract(); } // Returns true if `N` can assume no infinities involved in its computation. static bool hasNoInfs(const TargetOptions &Options, SDValue N) { return Options.NoInfsFPMath || N->getFlags().hasNoInfs(); } /// Try to perform FMA combining on a given FADD node. SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); SDLoc SL(N); const TargetOptions &Options = DAG.getTarget().Options; // Floating-point multiply-add with intermediate rounding. bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N)); // Floating-point multiply-add without intermediate rounding. bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); // No valid opcode, do not combine. if (!HasFMAD && !HasFMA) return SDValue(); bool CanReassociate = Options.UnsafeFPMath || N->getFlags().hasAllowReassociation(); bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath || HasFMAD); // If the addition is not contractable, do not combine. if (!AllowFusionGlobally && !N->getFlags().hasAllowContract()) return SDValue(); if (TLI.generateFMAsInMachineCombiner(VT, OptLevel)) return SDValue(); // Always prefer FMAD to FMA for precision. unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; bool Aggressive = TLI.enableAggressiveFMAFusion(VT); auto isFusedOp = [&](SDValue N) { unsigned Opcode = N.getOpcode(); return Opcode == ISD::FMA || Opcode == ISD::FMAD; }; // Is the node an FMUL and contractable either due to global flags or // SDNodeFlags. auto isContractableFMUL = [AllowFusionGlobally](SDValue N) { if (N.getOpcode() != ISD::FMUL) return false; return AllowFusionGlobally || N->getFlags().hasAllowContract(); }; // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), // prefer to fold the multiply with fewer uses. if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) { if (N0->use_size() > N1->use_size()) std::swap(N0, N1); } // fold (fadd (fmul x, y), z) -> (fma x, y, z) if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1), N1); } // fold (fadd x, (fmul y, z)) -> (fma y, z, x) // Note: Commutes FADD operands. if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0), N1.getOperand(1), N0); } // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E) // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E) // This requires reassociation because it changes the order of operations. SDValue FMA, E; if (CanReassociate && isFusedOp(N0) && N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() && N0.getOperand(2).hasOneUse()) { FMA = N0; E = N1; } else if (CanReassociate && isFusedOp(N1) && N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() && N1.getOperand(2).hasOneUse()) { FMA = N1; E = N0; } if (FMA && E) { SDValue A = FMA.getOperand(0); SDValue B = FMA.getOperand(1); SDValue C = FMA.getOperand(2).getOperand(0); SDValue D = FMA.getOperand(2).getOperand(1); SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E); return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE); } // Look through FP_EXTEND nodes to do more combining. // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) if (N0.getOpcode() == ISD::FP_EXTEND) { SDValue N00 = N0.getOperand(0); if (isContractableFMUL(N00) && TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, N00.getValueType())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)), DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), N1); } } // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x) // Note: Commutes FADD operands. if (N1.getOpcode() == ISD::FP_EXTEND) { SDValue N10 = N1.getOperand(0); if (isContractableFMUL(N10) && TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, N10.getValueType())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)), DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0); } } // More folding opportunities when target permits. if (Aggressive) { // fold (fadd (fma x, y, (fpext (fmul u, v))), z) // -> (fma x, y, (fma (fpext u), (fpext v), z)) auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) { return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y, DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, U), DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z)); }; if (isFusedOp(N0)) { SDValue N02 = N0.getOperand(2); if (N02.getOpcode() == ISD::FP_EXTEND) { SDValue N020 = N02.getOperand(0); if (isContractableFMUL(N020) && TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, N020.getValueType())) { return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1), N020.getOperand(0), N020.getOperand(1), N1); } } } // fold (fadd (fpext (fma x, y, (fmul u, v))), z) // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z)) // FIXME: This turns two single-precision and one double-precision // operation into two double-precision operations, which might not be // interesting for all targets, especially GPUs. auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) { return DAG.getNode( PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, X), DAG.getNode(ISD::FP_EXTEND, SL, VT, Y), DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, U), DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z)); }; if (N0.getOpcode() == ISD::FP_EXTEND) { SDValue N00 = N0.getOperand(0); if (isFusedOp(N00)) { SDValue N002 = N00.getOperand(2); if (isContractableFMUL(N002) && TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, N00.getValueType())) { return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1), N002.getOperand(0), N002.getOperand(1), N1); } } } // fold (fadd x, (fma y, z, (fpext (fmul u, v))) // -> (fma y, z, (fma (fpext u), (fpext v), x)) if (isFusedOp(N1)) { SDValue N12 = N1.getOperand(2); if (N12.getOpcode() == ISD::FP_EXTEND) { SDValue N120 = N12.getOperand(0); if (isContractableFMUL(N120) && TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, N120.getValueType())) { return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1), N120.getOperand(0), N120.getOperand(1), N0); } } } // fold (fadd x, (fpext (fma y, z, (fmul u, v))) // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x)) // FIXME: This turns two single-precision and one double-precision // operation into two double-precision operations, which might not be // interesting for all targets, especially GPUs. if (N1.getOpcode() == ISD::FP_EXTEND) { SDValue N10 = N1.getOperand(0); if (isFusedOp(N10)) { SDValue N102 = N10.getOperand(2); if (isContractableFMUL(N102) && TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, N10.getValueType())) { return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1), N102.getOperand(0), N102.getOperand(1), N0); } } } } return SDValue(); } /// Try to perform FMA combining on a given FSUB node. SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); SDLoc SL(N); const TargetOptions &Options = DAG.getTarget().Options; // Floating-point multiply-add with intermediate rounding. bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N)); // Floating-point multiply-add without intermediate rounding. bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); // No valid opcode, do not combine. if (!HasFMAD && !HasFMA) return SDValue(); const SDNodeFlags Flags = N->getFlags(); bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath || HasFMAD); // If the subtraction is not contractable, do not combine. if (!AllowFusionGlobally && !N->getFlags().hasAllowContract()) return SDValue(); if (TLI.generateFMAsInMachineCombiner(VT, OptLevel)) return SDValue(); // Always prefer FMAD to FMA for precision. unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; bool Aggressive = TLI.enableAggressiveFMAFusion(VT); bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros(); // Is the node an FMUL and contractable either due to global flags or // SDNodeFlags. auto isContractableFMUL = [AllowFusionGlobally](SDValue N) { if (N.getOpcode() != ISD::FMUL) return false; return AllowFusionGlobally || N->getFlags().hasAllowContract(); }; // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) { if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0), XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z)); } return SDValue(); }; // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) // Note: Commutes FSUB operands. auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) { if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)), YZ.getOperand(1), X); } return SDValue(); }; // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)), // prefer to fold the multiply with fewer uses. if (isContractableFMUL(N0) && isContractableFMUL(N1) && (N0->use_size() > N1->use_size())) { // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b)) if (SDValue V = tryToFoldXSubYZ(N0, N1)) return V; // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d))) if (SDValue V = tryToFoldXYSubZ(N0, N1)) return V; } else { // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) if (SDValue V = tryToFoldXYSubZ(N0, N1)) return V; // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) if (SDValue V = tryToFoldXSubYZ(N0, N1)) return V; } // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) && (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) { SDValue N00 = N0.getOperand(0).getOperand(0); SDValue N01 = N0.getOperand(0).getOperand(1); return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, N00), N01, DAG.getNode(ISD::FNEG, SL, VT, N1)); } // Look through FP_EXTEND nodes to do more combining. // fold (fsub (fpext (fmul x, y)), z) // -> (fma (fpext x), (fpext y), (fneg z)) if (N0.getOpcode() == ISD::FP_EXTEND) { SDValue N00 = N0.getOperand(0); if (isContractableFMUL(N00) && TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, N00.getValueType())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)), DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), DAG.getNode(ISD::FNEG, SL, VT, N1)); } } // fold (fsub x, (fpext (fmul y, z))) // -> (fma (fneg (fpext y)), (fpext z), x) // Note: Commutes FSUB operands. if (N1.getOpcode() == ISD::FP_EXTEND) { SDValue N10 = N1.getOperand(0); if (isContractableFMUL(N10) && TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, N10.getValueType())) { return DAG.getNode( PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))), DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0); } } // fold (fsub (fpext (fneg (fmul, x, y))), z) // -> (fneg (fma (fpext x), (fpext y), z)) // Note: This could be removed with appropriate canonicalization of the // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent // from implementing the canonicalization in visitFSUB. if (N0.getOpcode() == ISD::FP_EXTEND) { SDValue N00 = N0.getOperand(0); if (N00.getOpcode() == ISD::FNEG) { SDValue N000 = N00.getOperand(0); if (isContractableFMUL(N000) && TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, N00.getValueType())) { return DAG.getNode( ISD::FNEG, SL, VT, DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)), DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)), N1)); } } } // fold (fsub (fneg (fpext (fmul, x, y))), z) // -> (fneg (fma (fpext x)), (fpext y), z) // Note: This could be removed with appropriate canonicalization of the // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent // from implementing the canonicalization in visitFSUB. if (N0.getOpcode() == ISD::FNEG) { SDValue N00 = N0.getOperand(0); if (N00.getOpcode() == ISD::FP_EXTEND) { SDValue N000 = N00.getOperand(0); if (isContractableFMUL(N000) && TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, N000.getValueType())) { return DAG.getNode( ISD::FNEG, SL, VT, DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)), DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)), N1)); } } } auto isReassociable = [Options](SDNode *N) { return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation(); }; auto isContractableAndReassociableFMUL = [isContractableFMUL, isReassociable](SDValue N) { return isContractableFMUL(N) && isReassociable(N.getNode()); }; auto isFusedOp = [&](SDValue N) { unsigned Opcode = N.getOpcode(); return Opcode == ISD::FMA || Opcode == ISD::FMAD; }; // More folding opportunities when target permits. if (Aggressive && isReassociable(N)) { bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract(); // fold (fsub (fma x, y, (fmul u, v)), z) // -> (fma x, y (fma u, v, (fneg z))) if (CanFuse && isFusedOp(N0) && isContractableAndReassociableFMUL(N0.getOperand(2)) && N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) { return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1), DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(2).getOperand(0), N0.getOperand(2).getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, N1))); } // fold (fsub x, (fma y, z, (fmul u, v))) // -> (fma (fneg y), z, (fma (fneg u), v, x)) if (CanFuse && isFusedOp(N1) && isContractableAndReassociableFMUL(N1.getOperand(2)) && N1->hasOneUse() && NoSignedZero) { SDValue N20 = N1.getOperand(2).getOperand(0); SDValue N21 = N1.getOperand(2).getOperand(1); return DAG.getNode( PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1), DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0)); } // fold (fsub (fma x, y, (fpext (fmul u, v))), z) // -> (fma x, y (fma (fpext u), (fpext v), (fneg z))) if (isFusedOp(N0) && N0->hasOneUse()) { SDValue N02 = N0.getOperand(2); if (N02.getOpcode() == ISD::FP_EXTEND) { SDValue N020 = N02.getOperand(0); if (isContractableAndReassociableFMUL(N020) && TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, N020.getValueType())) { return DAG.getNode( PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1), DAG.getNode( PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)), DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)), DAG.getNode(ISD::FNEG, SL, VT, N1))); } } } // fold (fsub (fpext (fma x, y, (fmul u, v))), z) // -> (fma (fpext x), (fpext y), // (fma (fpext u), (fpext v), (fneg z))) // FIXME: This turns two single-precision and one double-precision // operation into two double-precision operations, which might not be // interesting for all targets, especially GPUs. if (N0.getOpcode() == ISD::FP_EXTEND) { SDValue N00 = N0.getOperand(0); if (isFusedOp(N00)) { SDValue N002 = N00.getOperand(2); if (isContractableAndReassociableFMUL(N002) && TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, N00.getValueType())) { return DAG.getNode( PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)), DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), DAG.getNode( PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)), DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)), DAG.getNode(ISD::FNEG, SL, VT, N1))); } } } // fold (fsub x, (fma y, z, (fpext (fmul u, v)))) // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x)) if (isFusedOp(N1) && N1.getOperand(2).getOpcode() == ISD::FP_EXTEND && N1->hasOneUse()) { SDValue N120 = N1.getOperand(2).getOperand(0); if (isContractableAndReassociableFMUL(N120) && TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, N120.getValueType())) { SDValue N1200 = N120.getOperand(0); SDValue N1201 = N120.getOperand(1); return DAG.getNode( PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1), DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, N1200)), DAG.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0)); } } // fold (fsub x, (fpext (fma y, z, (fmul u, v)))) // -> (fma (fneg (fpext y)), (fpext z), // (fma (fneg (fpext u)), (fpext v), x)) // FIXME: This turns two single-precision and one double-precision // operation into two double-precision operations, which might not be // interesting for all targets, especially GPUs. if (N1.getOpcode() == ISD::FP_EXTEND && isFusedOp(N1.getOperand(0))) { SDValue CvtSrc = N1.getOperand(0); SDValue N100 = CvtSrc.getOperand(0); SDValue N101 = CvtSrc.getOperand(1); SDValue N102 = CvtSrc.getOperand(2); if (isContractableAndReassociableFMUL(N102) && TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, CvtSrc.getValueType())) { SDValue N1020 = N102.getOperand(0); SDValue N1021 = N102.getOperand(1); return DAG.getNode( PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, N100)), DAG.getNode(ISD::FP_EXTEND, SL, VT, N101), DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, N1020)), DAG.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0)); } } } return SDValue(); } /// Try to perform FMA combining on a given FMUL node based on the distributive /// law x * (y + 1) = x * y + x and variants thereof (commuted versions, /// subtraction instead of addition). SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); SDLoc SL(N); assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation"); const TargetOptions &Options = DAG.getTarget().Options; // The transforms below are incorrect when x == 0 and y == inf, because the // intermediate multiplication produces a nan. SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1; if (!hasNoInfs(Options, FAdd)) return SDValue(); // Floating-point multiply-add without intermediate rounding. bool HasFMA = isContractableFMUL(Options, SDValue(N, 0)) && TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); // Floating-point multiply-add with intermediate rounding. This can result // in a less precise result due to the changed rounding order. bool HasFMAD = Options.UnsafeFPMath && (LegalOperations && TLI.isFMADLegal(DAG, N)); // No valid opcode, do not combine. if (!HasFMAD && !HasFMA) return SDValue(); // Always prefer FMAD to FMA for precision. unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; bool Aggressive = TLI.enableAggressiveFMAFusion(VT); // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y) // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y)) auto FuseFADD = [&](SDValue X, SDValue Y) { if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) { if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) { if (C->isExactlyValue(+1.0)) return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y); if (C->isExactlyValue(-1.0)) return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, DAG.getNode(ISD::FNEG, SL, VT, Y)); } } return SDValue(); }; if (SDValue FMA = FuseFADD(N0, N1)) return FMA; if (SDValue FMA = FuseFADD(N1, N0)) return FMA; // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y) // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y)) // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y)) // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y) auto FuseFSUB = [&](SDValue X, SDValue Y) { if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) { if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) { if (C0->isExactlyValue(+1.0)) return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, Y); if (C0->isExactlyValue(-1.0)) return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, DAG.getNode(ISD::FNEG, SL, VT, Y)); } if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) { if (C1->isExactlyValue(+1.0)) return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, DAG.getNode(ISD::FNEG, SL, VT, Y)); if (C1->isExactlyValue(-1.0)) return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y); } } return SDValue(); }; if (SDValue FMA = FuseFSUB(N0, N1)) return FMA; if (SDValue FMA = FuseFSUB(N1, N0)) return FMA; return SDValue(); } SDValue DAGCombiner::visitFADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0); bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1); EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; SDNodeFlags Flags = N->getFlags(); SelectionDAG::FlagInserter FlagsInserter(DAG, N); if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags)) return R; // fold (fadd c1, c2) -> c1 + c2 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1})) return C; // canonicalize constant to RHS if (N0CFP && !N1CFP) return DAG.getNode(ISD::FADD, DL, VT, N1, N0); // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math) ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true); if (N1C && N1C->isZero()) if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) return N0; if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; // fold (fadd A, (fneg B)) -> (fsub A, B) if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) if (SDValue NegN1 = TLI.getCheaperNegatedExpression( N1, DAG, LegalOperations, ForCodeSize)) return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1); // fold (fadd (fneg A), B) -> (fsub B, A) if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) if (SDValue NegN0 = TLI.getCheaperNegatedExpression( N0, DAG, LegalOperations, ForCodeSize)) return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0); auto isFMulNegTwo = [](SDValue FMul) { if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL) return false; auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true); return C && C->isExactlyValue(-2.0); }; // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B) if (isFMulNegTwo(N0)) { SDValue B = N0.getOperand(0); SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B); return DAG.getNode(ISD::FSUB, DL, VT, N1, Add); } // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B) if (isFMulNegTwo(N1)) { SDValue B = N1.getOperand(0); SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B); return DAG.getNode(ISD::FSUB, DL, VT, N0, Add); } // No FP constant should be created after legalization as Instruction // Selection pass has a hard time dealing with FP constants. bool AllowNewConst = (Level < AfterLegalizeDAG); // If nnan is enabled, fold lots of things. if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) { // If allowed, fold (fadd (fneg x), x) -> 0.0 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) return DAG.getConstantFP(0.0, DL, VT); // If allowed, fold (fadd x, (fneg x)) -> 0.0 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) return DAG.getConstantFP(0.0, DL, VT); } // If 'unsafe math' or reassoc and nsz, fold lots of things. // TODO: break out portions of the transformations below for which Unsafe is // considered and which do not require both nsz and reassoc if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) || (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) && AllowNewConst) { // fadd (fadd x, c1), c2 -> fadd x, c1 + c2 if (N1CFP && N0.getOpcode() == ISD::FADD && DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) { SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1); return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC); } // We can fold chains of FADD's of the same value into multiplications. // This transform is not safe in general because we are reducing the number // of rounding steps. if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) { if (N0.getOpcode() == ISD::FMUL) { bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)); // (fadd (fmul x, c), x) -> (fmul x, c+1) if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), DAG.getConstantFP(1.0, DL, VT)); return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP); } // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2) if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && N1.getOperand(0) == N1.getOperand(1) && N0.getOperand(0) == N1.getOperand(0)) { SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), DAG.getConstantFP(2.0, DL, VT)); return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP); } } if (N1.getOpcode() == ISD::FMUL) { bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1)); // (fadd x, (fmul x, c)) -> (fmul x, c+1) if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1), DAG.getConstantFP(1.0, DL, VT)); return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP); } // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2) if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1) && N1.getOperand(0) == N0.getOperand(0)) { SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1), DAG.getConstantFP(2.0, DL, VT)); return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP); } } if (N0.getOpcode() == ISD::FADD) { bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); // (fadd (fadd x, x), x) -> (fmul x, 3.0) if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) && (N0.getOperand(0) == N1)) { return DAG.getNode(ISD::FMUL, DL, VT, N1, DAG.getConstantFP(3.0, DL, VT)); } } if (N1.getOpcode() == ISD::FADD) { bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); // (fadd x, (fadd x, x)) -> (fmul x, 3.0) if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && N1.getOperand(0) == N0) { return DAG.getNode(ISD::FMUL, DL, VT, N0, DAG.getConstantFP(3.0, DL, VT)); } } // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0) if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1) && N1.getOperand(0) == N1.getOperand(1) && N0.getOperand(0) == N1.getOperand(0)) { return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), DAG.getConstantFP(4.0, DL, VT)); } } } // enable-unsafe-fp-math // FADD -> FMA combines: if (SDValue Fused = visitFADDForFMACombine(N)) { AddToWorklist(Fused.getNode()); return Fused; } return SDValue(); } SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) { SDValue Chain = N->getOperand(0); SDValue N0 = N->getOperand(1); SDValue N1 = N->getOperand(2); EVT VT = N->getValueType(0); EVT ChainVT = N->getValueType(1); SDLoc DL(N); SelectionDAG::FlagInserter FlagsInserter(DAG, N); // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B) if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT)) if (SDValue NegN1 = TLI.getCheaperNegatedExpression( N1, DAG, LegalOperations, ForCodeSize)) { return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT), {Chain, N0, NegN1}); } // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A) if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT)) if (SDValue NegN0 = TLI.getCheaperNegatedExpression( N0, DAG, LegalOperations, ForCodeSize)) { return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT), {Chain, N1, NegN0}); } return SDValue(); } SDValue DAGCombiner::visitFSUB(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true); ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true); EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; const SDNodeFlags Flags = N->getFlags(); SelectionDAG::FlagInserter FlagsInserter(DAG, N); if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags)) return R; // fold (fsub c1, c2) -> c1-c2 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1})) return C; // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; // (fsub A, 0) -> A if (N1CFP && N1CFP->isZero()) { if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) { return N0; } } if (N0 == N1) { // (fsub x, x) -> 0.0 if (Options.NoNaNsFPMath || Flags.hasNoNaNs()) return DAG.getConstantFP(0.0f, DL, VT); } // (fsub -0.0, N1) -> -N1 if (N0CFP && N0CFP->isZero()) { if (N0CFP->isNegative() || (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) { // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are // flushed to zero, unless all users treat denorms as zero (DAZ). // FIXME: This transform will change the sign of a NaN and the behavior // of a signaling NaN. It is only valid when a NoNaN flag is present. DenormalMode DenormMode = DAG.getDenormalMode(VT); if (DenormMode == DenormalMode::getIEEE()) { if (SDValue NegN1 = TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize)) return NegN1; if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, DL, VT, N1); } } } if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) || (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) && N1.getOpcode() == ISD::FADD) { // X - (X + Y) -> -Y if (N0 == N1->getOperand(0)) return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1)); // X - (Y + X) -> -Y if (N0 == N1->getOperand(1)) return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0)); } // fold (fsub A, (fneg B)) -> (fadd A, B) if (SDValue NegN1 = TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize)) return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1); // FSUB -> FMA combines: if (SDValue Fused = visitFSUBForFMACombine(N)) { AddToWorklist(Fused.getNode()); return Fused; } return SDValue(); } SDValue DAGCombiner::visitFMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true); EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; const SDNodeFlags Flags = N->getFlags(); SelectionDAG::FlagInserter FlagsInserter(DAG, N); if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags)) return R; // fold (fmul c1, c2) -> c1*c2 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1})) return C; // canonicalize constant to RHS if (DAG.isConstantFPBuildVectorOrConstantFP(N0) && !DAG.isConstantFPBuildVectorOrConstantFP(N1)) return DAG.getNode(ISD::FMUL, DL, VT, N1, N0); // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) { // fmul (fmul X, C1), C2 -> fmul X, C1 * C2 if (DAG.isConstantFPBuildVectorOrConstantFP(N1) && N0.getOpcode() == ISD::FMUL) { SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); // Avoid an infinite loop by making sure that N00 is not a constant // (the inner multiply has not been constant folded yet). if (DAG.isConstantFPBuildVectorOrConstantFP(N01) && !DAG.isConstantFPBuildVectorOrConstantFP(N00)) { SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1); return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts); } } // Match a special-case: we convert X * 2.0 into fadd. // fmul (fadd X, X), C -> fmul X, 2.0 * C if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() && N0.getOperand(0) == N0.getOperand(1)) { const SDValue Two = DAG.getConstantFP(2.0, DL, VT); SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1); return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts); } } // fold (fmul X, 2.0) -> (fadd X, X) if (N1CFP && N1CFP->isExactlyValue(+2.0)) return DAG.getNode(ISD::FADD, DL, VT, N0, N0); // fold (fmul X, -1.0) -> (fsub -0.0, X) if (N1CFP && N1CFP->isExactlyValue(-1.0)) { if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) { return DAG.getNode(ISD::FSUB, DL, VT, DAG.getConstantFP(-0.0, DL, VT), N0, Flags); } } // -N0 * -N1 --> N0 * N1 TargetLowering::NegatibleCost CostN0 = TargetLowering::NegatibleCost::Expensive; TargetLowering::NegatibleCost CostN1 = TargetLowering::NegatibleCost::Expensive; SDValue NegN0 = TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0); SDValue NegN1 = TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1); if (NegN0 && NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper || CostN1 == TargetLowering::NegatibleCost::Cheaper)) return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1); // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X)) // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X) if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() && (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) && TLI.isOperationLegal(ISD::FABS, VT)) { SDValue Select = N0, X = N1; if (Select.getOpcode() != ISD::SELECT) std::swap(Select, X); SDValue Cond = Select.getOperand(0); auto TrueOpnd = dyn_cast(Select.getOperand(1)); auto FalseOpnd = dyn_cast(Select.getOperand(2)); if (TrueOpnd && FalseOpnd && Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X && isa(Cond.getOperand(1)) && cast(Cond.getOperand(1))->isExactlyValue(0.0)) { ISD::CondCode CC = cast(Cond.getOperand(2))->get(); switch (CC) { default: break; case ISD::SETOLT: case ISD::SETULT: case ISD::SETOLE: case ISD::SETULE: case ISD::SETLT: case ISD::SETLE: std::swap(TrueOpnd, FalseOpnd); LLVM_FALLTHROUGH; case ISD::SETOGT: case ISD::SETUGT: case ISD::SETOGE: case ISD::SETUGE: case ISD::SETGT: case ISD::SETGE: if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) && TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, DL, VT, DAG.getNode(ISD::FABS, DL, VT, X)); if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0)) return DAG.getNode(ISD::FABS, DL, VT, X); break; } } } // FMUL -> FMA combines: if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) { AddToWorklist(Fused.getNode()); return Fused; } return SDValue(); } SDValue DAGCombiner::visitFMA(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); ConstantFPSDNode *N0CFP = dyn_cast(N0); ConstantFPSDNode *N1CFP = dyn_cast(N1); EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; // FMA nodes have flags that propagate to the created nodes. SelectionDAG::FlagInserter FlagsInserter(DAG, N); bool CanReassociate = Options.UnsafeFPMath || N->getFlags().hasAllowReassociation(); // Constant fold FMA. if (isa(N0) && isa(N1) && isa(N2)) { return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2); } // (-N0 * -N1) + N2 --> (N0 * N1) + N2 TargetLowering::NegatibleCost CostN0 = TargetLowering::NegatibleCost::Expensive; TargetLowering::NegatibleCost CostN1 = TargetLowering::NegatibleCost::Expensive; SDValue NegN0 = TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0); SDValue NegN1 = TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1); if (NegN0 && NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper || CostN1 == TargetLowering::NegatibleCost::Cheaper)) return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2); // FIXME: use fast math flags instead of Options.UnsafeFPMath if (Options.UnsafeFPMath) { if (N0CFP && N0CFP->isZero()) return N2; if (N1CFP && N1CFP->isZero()) return N2; } if (N0CFP && N0CFP->isExactlyValue(1.0)) return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2); if (N1CFP && N1CFP->isExactlyValue(1.0)) return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2); // Canonicalize (fma c, x, y) -> (fma x, c, y) if (DAG.isConstantFPBuildVectorOrConstantFP(N0) && !DAG.isConstantFPBuildVectorOrConstantFP(N1)) return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2); if (CanReassociate) { // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) && DAG.isConstantFPBuildVectorOrConstantFP(N1) && DAG.isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) { return DAG.getNode(ISD::FMUL, DL, VT, N0, DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1))); } // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) if (N0.getOpcode() == ISD::FMUL && DAG.isConstantFPBuildVectorOrConstantFP(N1) && DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) { return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0), DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)), N2); } } // (fma x, -1, y) -> (fadd (fneg x), y) if (N1CFP) { if (N1CFP->isExactlyValue(1.0)) return DAG.getNode(ISD::FADD, DL, VT, N0, N2); if (N1CFP->isExactlyValue(-1.0) && (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) { SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0); AddToWorklist(RHSNeg.getNode()); return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg); } // fma (fneg x), K, y -> fma x -K, y if (N0.getOpcode() == ISD::FNEG && (TLI.isOperationLegal(ISD::ConstantFP, VT) || (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, ForCodeSize)))) { return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0), DAG.getNode(ISD::FNEG, DL, VT, N1), N2); } } if (CanReassociate) { // (fma x, c, x) -> (fmul x, (c+1)) if (N1CFP && N0 == N2) { return DAG.getNode( ISD::FMUL, DL, VT, N0, DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(1.0, DL, VT))); } // (fma x, c, (fneg x)) -> (fmul x, (c-1)) if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) { return DAG.getNode( ISD::FMUL, DL, VT, N0, DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(-1.0, DL, VT))); } } // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z)) // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z)) if (!TLI.isFNegFree(VT)) if (SDValue Neg = TLI.getCheaperNegatedExpression( SDValue(N, 0), DAG, LegalOperations, ForCodeSize)) return DAG.getNode(ISD::FNEG, DL, VT, Neg); return SDValue(); } // Combine multiple FDIVs with the same divisor into multiple FMULs by the // reciprocal. // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) // Notice that this is not always beneficial. One reason is different targets // may have different costs for FDIV and FMUL, so sometimes the cost of two // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL". SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { // TODO: Limit this transform based on optsize/minsize - it always creates at // least 1 extra instruction. But the perf win may be substantial enough // that only minsize should restrict this. bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath; const SDNodeFlags Flags = N->getFlags(); if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal())) return SDValue(); // Skip if current node is a reciprocal/fneg-reciprocal. SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true); if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0))) return SDValue(); // Exit early if the target does not want this transform or if there can't // possibly be enough uses of the divisor to make the transform worthwhile. unsigned MinUses = TLI.combineRepeatedFPDivisors(); // For splat vectors, scale the number of uses by the splat factor. If we can // convert the division into a scalar op, that will likely be much faster. unsigned NumElts = 1; EVT VT = N->getValueType(0); if (VT.isVector() && DAG.isSplatValue(N1)) NumElts = VT.getVectorMinNumElements(); if (!MinUses || (N1->use_size() * NumElts) < MinUses) return SDValue(); // Find all FDIV users of the same divisor. // Use a set because duplicates may be present in the user list. SetVector Users; for (auto *U : N1->uses()) { if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) { // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet. if (U->getOperand(1).getOpcode() == ISD::FSQRT && U->getOperand(0) == U->getOperand(1).getOperand(0) && U->getFlags().hasAllowReassociation() && U->getFlags().hasNoSignedZeros()) continue; // This division is eligible for optimization only if global unsafe math // is enabled or if this division allows reciprocal formation. if (UnsafeMath || U->getFlags().hasAllowReciprocal()) Users.insert(U); } } // Now that we have the actual number of divisor uses, make sure it meets // the minimum threshold specified by the target. if ((Users.size() * NumElts) < MinUses) return SDValue(); SDLoc DL(N); SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags); // Dividend / Divisor -> Dividend * Reciprocal for (auto *U : Users) { SDValue Dividend = U->getOperand(0); if (Dividend != FPOne) { SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend, Reciprocal, Flags); CombineTo(U, NewNode); } else if (U != Reciprocal.getNode()) { // In the absence of fast-math-flags, this user node is always the // same node as Reciprocal, but with FMF they may be different nodes. CombineTo(U, Reciprocal); } } return SDValue(N, 0); // N was replaced. } SDValue DAGCombiner::visitFDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; SDNodeFlags Flags = N->getFlags(); SelectionDAG::FlagInserter FlagsInserter(DAG, N); if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags)) return R; // fold (fdiv c1, c2) -> c1/c2 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1})) return C; // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; if (SDValue V = combineRepeatedFPDivisors(N)) return V; if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) { // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. if (auto *N1CFP = dyn_cast(N1)) { // Compute the reciprocal 1.0 / c2. const APFloat &N1APF = N1CFP->getValueAPF(); APFloat Recip(N1APF.getSemantics(), 1); // 1.0 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); // Only do the transform if the reciprocal is a legal fp immediate that // isn't too nasty (eg NaN, denormal, ...). if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty (!LegalOperations || // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM // backend)... we should handle this gracefully after Legalize. // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) || TLI.isOperationLegal(ISD::ConstantFP, VT) || TLI.isFPImmLegal(Recip, VT, ForCodeSize))) return DAG.getNode(ISD::FMUL, DL, VT, N0, DAG.getConstantFP(Recip, DL, VT)); } // If this FDIV is part of a reciprocal square root, it may be folded // into a target-specific square root estimate instruction. if (N1.getOpcode() == ISD::FSQRT) { if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); } else if (N1.getOpcode() == ISD::FP_EXTEND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) { RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV); AddToWorklist(RV.getNode()); return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); } } else if (N1.getOpcode() == ISD::FP_ROUND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) { RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1)); AddToWorklist(RV.getNode()); return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); } } else if (N1.getOpcode() == ISD::FMUL) { // Look through an FMUL. Even though this won't remove the FDIV directly, // it's still worthwhile to get rid of the FSQRT if possible. SDValue Sqrt, Y; if (N1.getOperand(0).getOpcode() == ISD::FSQRT) { Sqrt = N1.getOperand(0); Y = N1.getOperand(1); } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) { Sqrt = N1.getOperand(1); Y = N1.getOperand(0); } if (Sqrt.getNode()) { // If the other multiply operand is known positive, pull it into the // sqrt. That will eliminate the division if we convert to an estimate. if (Flags.hasAllowReassociation() && N1.hasOneUse() && N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) { SDValue A; if (Y.getOpcode() == ISD::FABS && Y.hasOneUse()) A = Y.getOperand(0); else if (Y == Sqrt.getOperand(0)) A = Y; if (A) { // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z) // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A) SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A); SDValue AAZ = DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0)); if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags)) return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt); // Estimate creation failed. Clean up speculatively created nodes. recursivelyDeleteUnusedNodes(AAZ.getNode()); } } // We found a FSQRT, so try to make this fold: // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y) if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) { SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y); AddToWorklist(Div.getNode()); return DAG.getNode(ISD::FMUL, DL, VT, N0, Div); } } } // Fold into a reciprocal estimate and multiply instead of a real divide. if (Options.NoInfsFPMath || Flags.hasNoInfs()) if (SDValue RV = BuildDivEstimate(N0, N1, Flags)) return RV; } // Fold X/Sqrt(X) -> Sqrt(X) if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) && (Options.UnsafeFPMath || Flags.hasAllowReassociation())) if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0)) return N1; // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) TargetLowering::NegatibleCost CostN0 = TargetLowering::NegatibleCost::Expensive; TargetLowering::NegatibleCost CostN1 = TargetLowering::NegatibleCost::Expensive; SDValue NegN0 = TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0); SDValue NegN1 = TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1); if (NegN0 && NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper || CostN1 == TargetLowering::NegatibleCost::Cheaper)) return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1); return SDValue(); } SDValue DAGCombiner::visitFREM(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); SDNodeFlags Flags = N->getFlags(); SelectionDAG::FlagInserter FlagsInserter(DAG, N); if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags)) return R; // fold (frem c1, c2) -> fmod(c1,c2) if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, SDLoc(N), VT, {N0, N1})) return C; if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; return SDValue(); } SDValue DAGCombiner::visitFSQRT(SDNode *N) { SDNodeFlags Flags = N->getFlags(); const TargetOptions &Options = DAG.getTarget().Options; // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as: // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN if (!Flags.hasApproximateFuncs() || (!Options.NoInfsFPMath && !Flags.hasNoInfs())) return SDValue(); SDValue N0 = N->getOperand(0); if (TLI.isFsqrtCheap(N0, DAG)) return SDValue(); // FSQRT nodes have flags that propagate to the created nodes. // TODO: If this is N0/sqrt(N0), and we reach this node before trying to // transform the fdiv, we may produce a sub-optimal estimate sequence // because the reciprocal calculation may not have to filter out a // 0.0 input. return buildSqrtEstimate(N0, Flags); } /// copysign(x, fp_extend(y)) -> copysign(x, y) /// copysign(x, fp_round(y)) -> copysign(x, y) static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) { SDValue N1 = N->getOperand(1); if ((N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND)) { EVT N1VT = N1->getValueType(0); EVT N1Op0VT = N1->getOperand(0).getValueType(); // Always fold no-op FP casts. if (N1VT == N1Op0VT) return true; // Do not optimize out type conversion of f128 type yet. // For some targets like x86_64, configuration is changed to keep one f128 // value in one SSE register, but instruction selection cannot handle // FCOPYSIGN on SSE registers yet. if (N1Op0VT == MVT::f128) return false; // Avoid mismatched vector operand types, for better instruction selection. if (N1Op0VT.isVector()) return false; return true; } return false; } SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); // fold (fcopysign c1, c2) -> fcopysign(c1,c2) if (SDValue C = DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, SDLoc(N), VT, {N0, N1})) return C; if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) { const APFloat &V = N1C->getValueAPF(); // copysign(x, c1) -> fabs(x) iff ispos(c1) // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) if (!V.isNegative()) { if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT)) return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); } else { if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, SDLoc(N), VT, DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0)); } } // copysign(fabs(x), y) -> copysign(x, y) // copysign(fneg(x), y) -> copysign(x, y) // copysign(copysign(x,z), y) -> copysign(x, y) if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1); // copysign(x, abs(y)) -> abs(x) if (N1.getOpcode() == ISD::FABS) return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); // copysign(x, copysign(y,z)) -> copysign(x, z) if (N1.getOpcode() == ISD::FCOPYSIGN) return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1)); // copysign(x, fp_extend(y)) -> copysign(x, y) // copysign(x, fp_round(y)) -> copysign(x, y) if (CanCombineFCOPYSIGN_EXTEND_ROUND(N)) return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0)); return SDValue(); } SDValue DAGCombiner::visitFPOW(SDNode *N) { ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1)); if (!ExponentC) return SDValue(); SelectionDAG::FlagInserter FlagsInserter(DAG, N); // Try to convert x ** (1/3) into cube root. // TODO: Handle the various flavors of long double. // TODO: Since we're approximating, we don't need an exact 1/3 exponent. // Some range near 1/3 should be fine. EVT VT = N->getValueType(0); if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) || (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) { // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0. // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf. // pow(-val, 1/3) = nan; cbrt(-val) = -num. // For regular numbers, rounding may cause the results to differ. // Therefore, we require { nsz ninf nnan afn } for this transform. // TODO: We could select out the special cases if we don't have nsz/ninf. SDNodeFlags Flags = N->getFlags(); if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() || !Flags.hasApproximateFuncs()) return SDValue(); // Do not create a cbrt() libcall if the target does not have it, and do not // turn a pow that has lowering support into a cbrt() libcall. if (!DAG.getLibInfo().has(LibFunc_cbrt) || (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) && DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT))) return SDValue(); return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0)); } // Try to convert x ** (1/4) and x ** (3/4) into square roots. // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case. // TODO: This could be extended (using a target hook) to handle smaller // power-of-2 fractional exponents. bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25); bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75); if (ExponentIs025 || ExponentIs075) { // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0. // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN. // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0. // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN. // For regular numbers, rounding may cause the results to differ. // Therefore, we require { nsz ninf afn } for this transform. // TODO: We could select out the special cases if we don't have nsz/ninf. SDNodeFlags Flags = N->getFlags(); // We only need no signed zeros for the 0.25 case. if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() || !Flags.hasApproximateFuncs()) return SDValue(); // Don't double the number of libcalls. We are trying to inline fast code. if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT)) return SDValue(); // Assume that libcalls are the smallest code. // TODO: This restriction should probably be lifted for vectors. if (ForCodeSize) return SDValue(); // pow(X, 0.25) --> sqrt(sqrt(X)) SDLoc DL(N); SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0)); SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt); if (ExponentIs025) return SqrtSqrt; // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X)) return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt); } return SDValue(); } static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI) { // We only do this if the target has legal ftrunc. Otherwise, we'd likely be // replacing casts with a libcall. We also must be allowed to ignore -0.0 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer // conversions would return +0.0. // FIXME: We should be able to use node-level FMF here. // TODO: If strict math, should we use FABS (+ range check for signed cast)? EVT VT = N->getValueType(0); if (!TLI.isOperationLegal(ISD::FTRUNC, VT) || !DAG.getTarget().Options.NoSignedZerosFPMath) return SDValue(); // fptosi/fptoui round towards zero, so converting from FP to integer and // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X SDValue N0 = N->getOperand(0); if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT && N0.getOperand(0).getValueType() == VT) return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0)); if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT && N0.getOperand(0).getValueType() == VT) return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0)); return SDValue(); } SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); EVT OpVT = N0.getValueType(); // [us]itofp(undef) = 0, because the result value is bounded. if (N0.isUndef()) return DAG.getConstantFP(0.0, SDLoc(N), VT); // fold (sint_to_fp c1) -> c1fp if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); // If the input is a legal type, and SINT_TO_FP is not legal on this target, // but UINT_TO_FP is legal on this target, try to convert. if (!hasOperation(ISD::SINT_TO_FP, OpVT) && hasOperation(ISD::UINT_TO_FP, OpVT)) { // If the sign bit is known to be zero, we can change this to UINT_TO_FP. if (DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); } // The next optimizations are desirable only if SELECT_CC can be lowered. // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0) if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 && !VT.isVector() && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) { SDLoc DL(N); return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT)); } // fold (sint_to_fp (zext (setcc x, y, cc))) -> // (select (setcc x, y, cc), 1.0, 0.0) if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) { SDLoc DL(N); return DAG.getSelect(DL, VT, N0.getOperand(0), DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT)); } if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI)) return FTrunc; return SDValue(); } SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); EVT OpVT = N0.getValueType(); // [us]itofp(undef) = 0, because the result value is bounded. if (N0.isUndef()) return DAG.getConstantFP(0.0, SDLoc(N), VT); // fold (uint_to_fp c1) -> c1fp if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); // If the input is a legal type, and UINT_TO_FP is not legal on this target, // but SINT_TO_FP is legal on this target, try to convert. if (!hasOperation(ISD::UINT_TO_FP, OpVT) && hasOperation(ISD::SINT_TO_FP, OpVT)) { // If the sign bit is known to be zero, we can change this to SINT_TO_FP. if (DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); } // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0) if (N0.getOpcode() == ISD::SETCC && !VT.isVector() && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) { SDLoc DL(N); return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT)); } if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI)) return FTrunc; return SDValue(); } // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP) return SDValue(); SDValue Src = N0.getOperand(0); EVT SrcVT = Src.getValueType(); bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP; bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT; // We can safely assume the conversion won't overflow the output range, // because (for example) (uint8_t)18293.f is undefined behavior. // Since we can assume the conversion won't overflow, our decision as to // whether the input will fit in the float should depend on the minimum // of the input range and output range. // This means this is also safe for a signed input and unsigned output, since // a negative input would lead to undefined behavior. unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned; unsigned OutputSize = (int)VT.getScalarSizeInBits(); unsigned ActualSize = std::min(InputSize, OutputSize); const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType()); // We can only fold away the float conversion if the input range can be // represented exactly in the float range. if (APFloat::semanticsPrecision(sem) >= ActualSize) { if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) { unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; return DAG.getNode(ExtOp, SDLoc(N), VT, Src); } if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits()) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src); return DAG.getBitcast(VT, Src); } return SDValue(); } SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // fold (fp_to_sint undef) -> undef if (N0.isUndef()) return DAG.getUNDEF(VT); // fold (fp_to_sint c1fp) -> c1 if (DAG.isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0); return FoldIntToFPToInt(N, DAG); } SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // fold (fp_to_uint undef) -> undef if (N0.isUndef()) return DAG.getUNDEF(VT); // fold (fp_to_uint c1fp) -> c1 if (DAG.isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0); return FoldIntToFPToInt(N, DAG); } SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = dyn_cast(N0); EVT VT = N->getValueType(0); // fold (fp_round c1fp) -> c1fp if (N0CFP) return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1); // fold (fp_round (fp_extend x)) -> x if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType()) return N0.getOperand(0); // fold (fp_round (fp_round x)) -> (fp_round x) if (N0.getOpcode() == ISD::FP_ROUND) { const bool NIsTrunc = N->getConstantOperandVal(1) == 1; const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1; // Skip this folding if it results in an fp_round from f80 to f16. // // f80 to f16 always generates an expensive (and as yet, unimplemented) // libcall to __truncxfhf2 instead of selecting native f16 conversion // instructions from f32 or f64. Moreover, the first (value-preserving) // fp_round from f80 to either f32 or f64 may become a NOP in platforms like // x86. if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16) return SDValue(); // If the first fp_round isn't a value preserving truncation, it might // introduce a tie in the second fp_round, that wouldn't occur in the // single-step fp_round we want to fold to. // In other words, double rounding isn't the same as rounding. // Also, this is a value preserving truncation iff both fp_round's are. if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) { SDLoc DL(N); return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0), DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL)); } } // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse()) { SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT, N0.getOperand(0), N1); AddToWorklist(Tmp.getNode()); return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, Tmp, N0.getOperand(1)); } if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) return NewVSel; return SDValue(); } SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::FP_ROUND) return SDValue(); // fold (fp_extend c1fp) -> c1fp if (DAG.isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0); // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op) if (N0.getOpcode() == ISD::FP16_TO_FP && TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal) return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0)); // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the // value of X. if (N0.getOpcode() == ISD::FP_ROUND && N0.getConstantOperandVal(1) == 1) { SDValue In = N0.getOperand(0); if (In.getValueType() == VT) return In; if (VT.bitsLT(In.getValueType())) return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, In, N0.getOperand(1)); return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In); } // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, VT, N0.getValueType())) { LoadSDNode *LN0 = cast(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(), LN0->getBasePtr(), N0.getValueType(), LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1, SDLoc(N0))), ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) return NewVSel; return SDValue(); } SDValue DAGCombiner::visitFCEIL(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // fold (fceil c1) -> fceil(c1) if (DAG.isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0); return SDValue(); } SDValue DAGCombiner::visitFTRUNC(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // fold (ftrunc c1) -> ftrunc(c1) if (DAG.isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0); // fold ftrunc (known rounded int x) -> x // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is // likely to be generated to extract integer from a rounded floating value. switch (N0.getOpcode()) { default: break; case ISD::FRINT: case ISD::FTRUNC: case ISD::FNEARBYINT: case ISD::FFLOOR: case ISD::FCEIL: return N0; } return SDValue(); } SDValue DAGCombiner::visitFFLOOR(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // fold (ffloor c1) -> ffloor(c1) if (DAG.isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0); return SDValue(); } SDValue DAGCombiner::visitFNEG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); SelectionDAG::FlagInserter FlagsInserter(DAG, N); // Constant fold FNEG. if (DAG.isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); if (SDValue NegN0 = TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize)) return NegN0; // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't // know it was called from a context with a nsz flag if the input fsub does // not. if (N0.getOpcode() == ISD::FSUB && (DAG.getTarget().Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) { return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1), N0.getOperand(0)); } if (SDValue Cast = foldSignChangeInBitcast(N)) return Cast; return SDValue(); } SDValue DAGCombiner::visitFMinMax(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); const SDNodeFlags Flags = N->getFlags(); unsigned Opc = N->getOpcode(); bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM; bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM; SelectionDAG::FlagInserter FlagsInserter(DAG, N); // Constant fold. if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1})) return C; // Canonicalize to constant on RHS. if (DAG.isConstantFPBuildVectorOrConstantFP(N0) && !DAG.isConstantFPBuildVectorOrConstantFP(N1)) return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) { const APFloat &AF = N1CFP->getValueAPF(); // minnum(X, nan) -> X // maxnum(X, nan) -> X // minimum(X, nan) -> nan // maximum(X, nan) -> nan if (AF.isNaN()) return PropagatesNaN ? N->getOperand(1) : N->getOperand(0); // In the following folds, inf can be replaced with the largest finite // float, if the ninf flag is set. if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) { // minnum(X, -inf) -> -inf // maxnum(X, +inf) -> +inf // minimum(X, -inf) -> -inf if nnan // maximum(X, +inf) -> +inf if nnan if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs())) return N->getOperand(1); // minnum(X, +inf) -> X if nnan // maxnum(X, -inf) -> X if nnan // minimum(X, +inf) -> X // maximum(X, -inf) -> X if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs())) return N->getOperand(0); } } return SDValue(); } SDValue DAGCombiner::visitFABS(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // fold (fabs c1) -> fabs(c1) if (DAG.isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); // fold (fabs (fabs x)) -> (fabs x) if (N0.getOpcode() == ISD::FABS) return N->getOperand(0); // fold (fabs (fneg x)) -> (fabs x) // fold (fabs (fcopysign x, y)) -> (fabs x) if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0)); if (SDValue Cast = foldSignChangeInBitcast(N)) return Cast; return SDValue(); } SDValue DAGCombiner::visitBRCOND(SDNode *N) { SDValue Chain = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are // nondeterministic jumps). if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) { return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, N1->getOperand(0), N2); } // If N is a constant we could fold this into a fallthrough or unconditional // branch. However that doesn't happen very often in normal code, because // Instcombine/SimplifyCFG should have handled the available opportunities. // If we did this folding here, it would be necessary to update the // MachineBasicBlock CFG, which is awkward. // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal // on the target. if (N1.getOpcode() == ISD::SETCC && TLI.isOperationLegalOrCustom(ISD::BR_CC, N1.getOperand(0).getValueType())) { return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, Chain, N1.getOperand(2), N1.getOperand(0), N1.getOperand(1), N2); } if (N1.hasOneUse()) { // rebuildSetCC calls visitXor which may change the Chain when there is a // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes. HandleSDNode ChainHandle(Chain); if (SDValue NewN1 = rebuildSetCC(N1)) return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, ChainHandle.getValue(), NewN1, N2); } return SDValue(); } SDValue DAGCombiner::rebuildSetCC(SDValue N) { if (N.getOpcode() == ISD::SRL || (N.getOpcode() == ISD::TRUNCATE && (N.getOperand(0).hasOneUse() && N.getOperand(0).getOpcode() == ISD::SRL))) { // Look pass the truncate. if (N.getOpcode() == ISD::TRUNCATE) N = N.getOperand(0); // Match this pattern so that we can generate simpler code: // // %a = ... // %b = and i32 %a, 2 // %c = srl i32 %b, 1 // brcond i32 %c ... // // into // // %a = ... // %b = and i32 %a, 2 // %c = setcc eq %b, 0 // brcond %c ... // // This applies only when the AND constant value has one bit set and the // SRL constant is equal to the log2 of the AND constant. The back-end is // smart enough to convert the result into a TEST/JMP sequence. SDValue Op0 = N.getOperand(0); SDValue Op1 = N.getOperand(1); if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) { SDValue AndOp1 = Op0.getOperand(1); if (AndOp1.getOpcode() == ISD::Constant) { const APInt &AndConst = cast(AndOp1)->getAPIntValue(); if (AndConst.isPowerOf2() && cast(Op1)->getAPIntValue() == AndConst.logBase2()) { SDLoc DL(N); return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()), Op0, DAG.getConstant(0, DL, Op0.getValueType()), ISD::SETNE); } } } } // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne)) // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq)) if (N.getOpcode() == ISD::XOR) { // Because we may call this on a speculatively constructed // SimplifiedSetCC Node, we need to simplify this node first. // Ideally this should be folded into SimplifySetCC and not // here. For now, grab a handle to N so we don't lose it from // replacements interal to the visit. HandleSDNode XORHandle(N); while (N.getOpcode() == ISD::XOR) { SDValue Tmp = visitXOR(N.getNode()); // No simplification done. if (!Tmp.getNode()) break; // Returning N is form in-visit replacement that may invalidated // N. Grab value from Handle. if (Tmp.getNode() == N.getNode()) N = XORHandle.getValue(); else // Node simplified. Try simplifying again. N = Tmp; } if (N.getOpcode() != ISD::XOR) return N; SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) { bool Equal = false; // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq)) if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR && Op0.getValueType() == MVT::i1) { N = Op0; Op0 = N->getOperand(0); Op1 = N->getOperand(1); Equal = true; } EVT SetCCVT = N.getValueType(); if (LegalTypes) SetCCVT = getSetCCResultType(SetCCVT); // Replace the uses of XOR with SETCC return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1, Equal ? ISD::SETEQ : ISD::SETNE); } } return SDValue(); } // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB. // SDValue DAGCombiner::visitBR_CC(SDNode *N) { CondCodeSDNode *CC = cast(N->getOperand(1)); SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3); // If N is a constant we could fold this into a fallthrough or unconditional // branch. However that doesn't happen very often in normal code, because // Instcombine/SimplifyCFG should have handled the available opportunities. // If we did this folding here, it would be necessary to update the // MachineBasicBlock CFG, which is awkward. // Use SimplifySetCC to simplify SETCC's. SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()), CondLHS, CondRHS, CC->get(), SDLoc(N), false); if (Simp.getNode()) AddToWorklist(Simp.getNode()); // fold to a simpler setcc if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, N->getOperand(0), Simp.getOperand(2), Simp.getOperand(0), Simp.getOperand(1), N->getOperand(4)); return SDValue(); } static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec, bool &IsLoad, bool &IsMasked, SDValue &Ptr, const TargetLowering &TLI) { if (LoadSDNode *LD = dyn_cast(N)) { if (LD->isIndexed()) return false; EVT VT = LD->getMemoryVT(); if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT)) return false; Ptr = LD->getBasePtr(); } else if (StoreSDNode *ST = dyn_cast(N)) { if (ST->isIndexed()) return false; EVT VT = ST->getMemoryVT(); if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT)) return false; Ptr = ST->getBasePtr(); IsLoad = false; } else if (MaskedLoadSDNode *LD = dyn_cast(N)) { if (LD->isIndexed()) return false; EVT VT = LD->getMemoryVT(); if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) && !TLI.isIndexedMaskedLoadLegal(Dec, VT)) return false; Ptr = LD->getBasePtr(); IsMasked = true; } else if (MaskedStoreSDNode *ST = dyn_cast(N)) { if (ST->isIndexed()) return false; EVT VT = ST->getMemoryVT(); if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) && !TLI.isIndexedMaskedStoreLegal(Dec, VT)) return false; Ptr = ST->getBasePtr(); IsLoad = false; IsMasked = true; } else { return false; } return true; } /// Try turning a load/store into a pre-indexed load/store when the base /// pointer is an add or subtract and it has other uses besides the load/store. /// After the transformation, the new indexed load/store has effectively folded /// the add/subtract in and all of its other uses are redirected to the /// new load/store. bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { if (Level < AfterLegalizeDAG) return false; bool IsLoad = true; bool IsMasked = false; SDValue Ptr; if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked, Ptr, TLI)) return false; // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail // out. There is no reason to make this a preinc/predec. if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) || Ptr->hasOneUse()) return false; // Ask the target to do addressing mode selection. SDValue BasePtr; SDValue Offset; ISD::MemIndexedMode AM = ISD::UNINDEXED; if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG)) return false; // Backends without true r+i pre-indexed forms may need to pass a // constant base with a variable offset so that constant coercion // will work with the patterns in canonical form. bool Swapped = false; if (isa(BasePtr)) { std::swap(BasePtr, Offset); Swapped = true; } // Don't create a indexed load / store with zero offset. if (isNullConstant(Offset)) return false; // Try turning it into a pre-indexed load / store except when: // 1) The new base ptr is a frame index. // 2) If N is a store and the new base ptr is either the same as or is a // predecessor of the value being stored. // 3) Another use of old base ptr is a predecessor of N. If ptr is folded // that would create a cycle. // 4) All uses are load / store ops that use it as old base ptr. // Check #1. Preinc'ing a frame index would require copying the stack pointer // (plus the implicit offset) to a register to preinc anyway. if (isa(BasePtr) || isa(BasePtr)) return false; // Check #2. if (!IsLoad) { SDValue Val = IsMasked ? cast(N)->getValue() : cast(N)->getValue(); // Would require a copy. if (Val == BasePtr) return false; // Would create a cycle. if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode())) return false; } // Caches for hasPredecessorHelper. SmallPtrSet Visited; SmallVector Worklist; Worklist.push_back(N); // If the offset is a constant, there may be other adds of constants that // can be folded with this one. We should do this to avoid having to keep // a copy of the original base pointer. SmallVector OtherUses; if (isa(Offset)) for (SDNode::use_iterator UI = BasePtr->use_begin(), UE = BasePtr->use_end(); UI != UE; ++UI) { SDUse &Use = UI.getUse(); // Skip the use that is Ptr and uses of other results from BasePtr's // node (important for nodes that return multiple results). if (Use.getUser() == Ptr.getNode() || Use != BasePtr) continue; if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist)) continue; if (Use.getUser()->getOpcode() != ISD::ADD && Use.getUser()->getOpcode() != ISD::SUB) { OtherUses.clear(); break; } SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1); if (!isa(Op1)) { OtherUses.clear(); break; } // FIXME: In some cases, we can be smarter about this. if (Op1.getValueType() != Offset.getValueType()) { OtherUses.clear(); break; } OtherUses.push_back(Use.getUser()); } if (Swapped) std::swap(BasePtr, Offset); // Now check for #3 and #4. bool RealUse = false; for (SDNode *Use : Ptr->uses()) { if (Use == N) continue; if (SDNode::hasPredecessorHelper(Use, Visited, Worklist)) return false; // If Ptr may be folded in addressing mode of other use, then it's // not profitable to do this transformation. if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI)) RealUse = true; } if (!RealUse) return false; SDValue Result; if (!IsMasked) { if (IsLoad) Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM); else Result = DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM); } else { if (IsLoad) Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM); else Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM); } ++PreIndexedNodes; ++NodesCombined; LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: "; Result.dump(&DAG); dbgs() << '\n'); WorklistRemover DeadNodes(*this); if (IsLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); } else { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); } // Finally, since the node is now dead, remove it from the graph. deleteAndRecombine(N); if (Swapped) std::swap(BasePtr, Offset); // Replace other uses of BasePtr that can be updated to use Ptr for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) { unsigned OffsetIdx = 1; if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode()) OffsetIdx = 0; assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() == BasePtr.getNode() && "Expected BasePtr operand"); // We need to replace ptr0 in the following expression: // x0 * offset0 + y0 * ptr0 = t0 // knowing that // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store) // // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the // indexed load/store and the expression that needs to be re-written. // // Therefore, we have: // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1 auto *CN = cast(OtherUses[i]->getOperand(OffsetIdx)); const APInt &Offset0 = CN->getAPIntValue(); const APInt &Offset1 = cast(Offset)->getAPIntValue(); int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1; int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1; int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1; int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1; unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD; APInt CNV = Offset0; if (X0 < 0) CNV = -CNV; if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1; else CNV = CNV - Offset1; SDLoc DL(OtherUses[i]); // We can now generate the new expression. SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0)); SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0); SDValue NewUse = DAG.getNode(Opcode, DL, OtherUses[i]->getValueType(0), NewOp1, NewOp2); DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse); deleteAndRecombine(OtherUses[i]); } // Replace the uses of Ptr with uses of the updated base value. DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0)); deleteAndRecombine(Ptr.getNode()); AddToWorklist(Result.getNode()); return true; } static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI) { if (PtrUse == N || (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB)) return false; if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG)) return false; // Don't create a indexed load / store with zero offset. if (isNullConstant(Offset)) return false; if (isa(BasePtr) || isa(BasePtr)) return false; SmallPtrSet Visited; for (SDNode *Use : BasePtr->uses()) { if (Use == Ptr.getNode()) continue; // No if there's a later user which could perform the index instead. if (isa(Use)) { bool IsLoad = true; bool IsMasked = false; SDValue OtherPtr; if (getCombineLoadStoreParts(Use, ISD::POST_INC, ISD::POST_DEC, IsLoad, IsMasked, OtherPtr, TLI)) { SmallVector Worklist; Worklist.push_back(Use); if (SDNode::hasPredecessorHelper(N, Visited, Worklist)) return false; } } // If all the uses are load / store addresses, then don't do the // transformation. if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) { for (SDNode *UseUse : Use->uses()) if (canFoldInAddressingMode(Use, UseUse, DAG, TLI)) return false; } } return true; } static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad, bool &IsMasked, SDValue &Ptr, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI) { if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad, IsMasked, Ptr, TLI) || Ptr->hasOneUse()) return nullptr; // Try turning it into a post-indexed load / store except when // 1) All uses are load / store ops that use it as base ptr (and // it may be folded as addressing mmode). // 2) Op must be independent of N, i.e. Op is neither a predecessor // nor a successor of N. Otherwise, if Op is folded that would // create a cycle. for (SDNode *Op : Ptr->uses()) { // Check for #1. if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI)) continue; // Check for #2. SmallPtrSet Visited; SmallVector Worklist; // Ptr is predecessor to both N and Op. Visited.insert(Ptr.getNode()); Worklist.push_back(N); Worklist.push_back(Op); if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) && !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) return Op; } return nullptr; } /// Try to combine a load/store with a add/sub of the base pointer node into a /// post-indexed load/store. The transformation folded the add/subtract into the /// new indexed load/store effectively and all of its uses are redirected to the /// new load/store. bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { if (Level < AfterLegalizeDAG) return false; bool IsLoad = true; bool IsMasked = false; SDValue Ptr; SDValue BasePtr; SDValue Offset; ISD::MemIndexedMode AM = ISD::UNINDEXED; SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr, Offset, AM, DAG, TLI); if (!Op) return false; SDValue Result; if (!IsMasked) Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM) : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM); else Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM) : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM); ++PostIndexedNodes; ++NodesCombined; LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: "; Result.dump(&DAG); dbgs() << '\n'); WorklistRemover DeadNodes(*this); if (IsLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); } else { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); } // Finally, since the node is now dead, remove it from the graph. deleteAndRecombine(N); // Replace the uses of Use with uses of the updated base value. DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), Result.getValue(IsLoad ? 1 : 0)); deleteAndRecombine(Op); return true; } /// Return the base-pointer arithmetic from an indexed \p LD. SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) { ISD::MemIndexedMode AM = LD->getAddressingMode(); assert(AM != ISD::UNINDEXED); SDValue BP = LD->getOperand(1); SDValue Inc = LD->getOperand(2); // Some backends use TargetConstants for load offsets, but don't expect // TargetConstants in general ADD nodes. We can convert these constants into // regular Constants (if the constant is not opaque). assert((Inc.getOpcode() != ISD::TargetConstant || !cast(Inc)->isOpaque()) && "Cannot split out indexing using opaque target constants"); if (Inc.getOpcode() == ISD::TargetConstant) { ConstantSDNode *ConstInc = cast(Inc); Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc), ConstInc->getValueType(0)); } unsigned Opc = (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB); return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc); } static inline ElementCount numVectorEltsOrZero(EVT T) { return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0); } bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) { Val = ST->getValue(); EVT STType = Val.getValueType(); EVT STMemType = ST->getMemoryVT(); if (STType == STMemType) return true; if (isTypeLegal(STMemType)) return false; // fail. if (STType.isFloatingPoint() && STMemType.isFloatingPoint() && TLI.isOperationLegal(ISD::FTRUNC, STMemType)) { Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val); return true; } if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) && STType.isInteger() && STMemType.isInteger()) { Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val); return true; } if (STType.getSizeInBits() == STMemType.getSizeInBits()) { Val = DAG.getBitcast(STMemType, Val); return true; } return false; // fail. } bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) { EVT LDMemType = LD->getMemoryVT(); EVT LDType = LD->getValueType(0); assert(Val.getValueType() == LDMemType && "Attempting to extend value of non-matching type"); if (LDType == LDMemType) return true; if (LDMemType.isInteger() && LDType.isInteger()) { switch (LD->getExtensionType()) { case ISD::NON_EXTLOAD: Val = DAG.getBitcast(LDType, Val); return true; case ISD::EXTLOAD: Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val); return true; case ISD::SEXTLOAD: Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val); return true; case ISD::ZEXTLOAD: Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val); return true; } } return false; } SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) { if (OptLevel == CodeGenOpt::None || !LD->isSimple()) return SDValue(); SDValue Chain = LD->getOperand(0); StoreSDNode *ST = dyn_cast(Chain.getNode()); // TODO: Relax this restriction for unordered atomics (see D66309) if (!ST || !ST->isSimple()) return SDValue(); EVT LDType = LD->getValueType(0); EVT LDMemType = LD->getMemoryVT(); EVT STMemType = ST->getMemoryVT(); EVT STType = ST->getValue().getValueType(); // There are two cases to consider here: // 1. The store is fixed width and the load is scalable. In this case we // don't know at compile time if the store completely envelops the load // so we abandon the optimisation. // 2. The store is scalable and the load is fixed width. We could // potentially support a limited number of cases here, but there has been // no cost-benefit analysis to prove it's worth it. bool LdStScalable = LDMemType.isScalableVector(); if (LdStScalable != STMemType.isScalableVector()) return SDValue(); // If we are dealing with scalable vectors on a big endian platform the // calculation of offsets below becomes trickier, since we do not know at // compile time the absolute size of the vector. Until we've done more // analysis on big-endian platforms it seems better to bail out for now. if (LdStScalable && DAG.getDataLayout().isBigEndian()) return SDValue(); BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG); BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG); int64_t Offset; if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset)) return SDValue(); // Normalize for Endianness. After this Offset=0 will denote that the least // significant bit in the loaded value maps to the least significant bit in // the stored value). With Offset=n (for n > 0) the loaded value starts at the // n:th least significant byte of the stored value. if (DAG.getDataLayout().isBigEndian()) Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedSize() - (int64_t)LDMemType.getStoreSizeInBits().getFixedSize()) / 8 - Offset; // Check that the stored value cover all bits that are loaded. bool STCoversLD; TypeSize LdMemSize = LDMemType.getSizeInBits(); TypeSize StMemSize = STMemType.getSizeInBits(); if (LdStScalable) STCoversLD = (Offset == 0) && LdMemSize == StMemSize; else STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedSize() <= StMemSize.getFixedSize()); auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue { if (LD->isIndexed()) { // Cannot handle opaque target constants and we must respect the user's // request not to split indexes from loads. if (!canSplitIdx(LD)) return SDValue(); SDValue Idx = SplitIndexingFromLoad(LD); SDValue Ops[] = {Val, Idx, Chain}; return CombineTo(LD, Ops, 3); } return CombineTo(LD, Val, Chain); }; if (!STCoversLD) return SDValue(); // Memory as copy space (potentially masked). if (Offset == 0 && LDType == STType && STMemType == LDMemType) { // Simple case: Direct non-truncating forwarding if (LDType.getSizeInBits() == LdMemSize) return ReplaceLd(LD, ST->getValue(), Chain); // Can we model the truncate and extension with an and mask? if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() && !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) { // Mask to size of LDMemType auto Mask = DAG.getConstant(APInt::getLowBitsSet(STType.getFixedSizeInBits(), StMemSize.getFixedSize()), SDLoc(ST), STType); auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask); return ReplaceLd(LD, Val, Chain); } } // TODO: Deal with nonzero offset. if (LD->getBasePtr().isUndef() || Offset != 0) return SDValue(); // Model necessary truncations / extenstions. SDValue Val; // Truncate Value To Stored Memory Size. do { if (!getTruncatedStoreValue(ST, Val)) continue; if (!isTypeLegal(LDMemType)) continue; if (STMemType != LDMemType) { // TODO: Support vectors? This requires extract_subvector/bitcast. if (!STMemType.isVector() && !LDMemType.isVector() && STMemType.isInteger() && LDMemType.isInteger()) Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val); else continue; } if (!extendLoadedValueToExtension(LD, Val)) continue; return ReplaceLd(LD, Val, Chain); } while (false); // On failure, cleanup dead nodes we may have created. if (Val->use_empty()) deleteAndRecombine(Val.getNode()); return SDValue(); } SDValue DAGCombiner::visitLOAD(SDNode *N) { LoadSDNode *LD = cast(N); SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); // If load is not volatile and there are no uses of the loaded value (and // the updated indexed value in case of indexed loads), change uses of the // chain value into uses of the chain input (i.e. delete the dead load). // TODO: Allow this for unordered atomics (see D66309) if (LD->isSimple()) { if (N->getValueType(1) == MVT::Other) { // Unindexed loads. if (!N->hasAnyUseOfValue(0)) { // It's not safe to use the two value CombineTo variant here. e.g. // v1, chain2 = load chain1, loc // v2, chain3 = load chain2, loc // v3 = add v2, c // Now we replace use of chain2 with chain1. This makes the second load // isomorphic to the one we are deleting, and thus makes this load live. LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG); dbgs() << "\nWith chain: "; Chain.dump(&DAG); dbgs() << "\n"); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); AddUsersToWorklist(Chain.getNode()); if (N->use_empty()) deleteAndRecombine(N); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } else { // Indexed loads. assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); // If this load has an opaque TargetConstant offset, then we cannot split // the indexing into an add/sub directly (that TargetConstant may not be // valid for a different type of node, and we cannot convert an opaque // target constant into a regular constant). bool CanSplitIdx = canSplitIdx(LD); if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) { SDValue Undef = DAG.getUNDEF(N->getValueType(0)); SDValue Index; if (N->hasAnyUseOfValue(1) && CanSplitIdx) { Index = SplitIndexingFromLoad(LD); // Try to fold the base pointer arithmetic into subsequent loads and // stores. AddUsersToWorklist(N); } else Index = DAG.getUNDEF(N->getValueType(1)); LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG); dbgs() << "\nWith: "; Undef.dump(&DAG); dbgs() << " and 2 other values\n"); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain); deleteAndRecombine(N); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } } // If this load is directly stored, replace the load value with the stored // value. if (auto V = ForwardStoreValueToDirectLoad(LD)) return V; // Try to infer better alignment information than the load already has. if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) { if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) { if (*Alignment > LD->getAlign() && isAligned(*Alignment, LD->getSrcValueOffset())) { SDValue NewLoad = DAG.getExtLoad( LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr, LD->getPointerInfo(), LD->getMemoryVT(), *Alignment, LD->getMemOperand()->getFlags(), LD->getAAInfo()); // NewLoad will always be N as we are only refining the alignment assert(NewLoad.getNode() == N); (void)NewLoad; } } } if (LD->isUnindexed()) { // Walk up chain skipping non-aliasing memory nodes. SDValue BetterChain = FindBetterChain(LD, Chain); // If there is a better chain. if (Chain != BetterChain) { SDValue ReplLoad; // Replace the chain to void dependency. if (LD->getExtensionType() == ISD::NON_EXTLOAD) { ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD), BetterChain, Ptr, LD->getMemOperand()); } else { ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), LD->getValueType(0), BetterChain, Ptr, LD->getMemoryVT(), LD->getMemOperand()); } // Create token factor to keep old chain connected. SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Chain, ReplLoad.getValue(1)); // Replace uses with load result and token factor return CombineTo(N, ReplLoad.getValue(0), Token); } } // Try transforming N to an indexed load. if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) return SDValue(N, 0); // Try to slice up N to more direct loads if the slices are mapped to // different register banks or pairing can take place. if (SliceUpLoad(N)) return SDValue(N, 0); return SDValue(); } namespace { /// Helper structure used to slice a load in smaller loads. /// Basically a slice is obtained from the following sequence: /// Origin = load Ty1, Base /// Shift = srl Ty1 Origin, CstTy Amount /// Inst = trunc Shift to Ty2 /// /// Then, it will be rewritten into: /// Slice = load SliceTy, Base + SliceOffset /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2 /// /// SliceTy is deduced from the number of bits that are actually used to /// build Inst. struct LoadedSlice { /// Helper structure used to compute the cost of a slice. struct Cost { /// Are we optimizing for code size. bool ForCodeSize = false; /// Various cost. unsigned Loads = 0; unsigned Truncates = 0; unsigned CrossRegisterBanksCopies = 0; unsigned ZExts = 0; unsigned Shift = 0; explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {} /// Get the cost of one isolated slice. Cost(const LoadedSlice &LS, bool ForCodeSize) : ForCodeSize(ForCodeSize), Loads(1) { EVT TruncType = LS.Inst->getValueType(0); EVT LoadedType = LS.getLoadedType(); if (TruncType != LoadedType && !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType)) ZExts = 1; } /// Account for slicing gain in the current cost. /// Slicing provide a few gains like removing a shift or a /// truncate. This method allows to grow the cost of the original /// load with the gain from this slice. void addSliceGain(const LoadedSlice &LS) { // Each slice saves a truncate. const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo(); if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(), LS.Inst->getValueType(0))) ++Truncates; // If there is a shift amount, this slice gets rid of it. if (LS.Shift) ++Shift; // If this slice can merge a cross register bank copy, account for it. if (LS.canMergeExpensiveCrossRegisterBankCopy()) ++CrossRegisterBanksCopies; } Cost &operator+=(const Cost &RHS) { Loads += RHS.Loads; Truncates += RHS.Truncates; CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies; ZExts += RHS.ZExts; Shift += RHS.Shift; return *this; } bool operator==(const Cost &RHS) const { return Loads == RHS.Loads && Truncates == RHS.Truncates && CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies && ZExts == RHS.ZExts && Shift == RHS.Shift; } bool operator!=(const Cost &RHS) const { return !(*this == RHS); } bool operator<(const Cost &RHS) const { // Assume cross register banks copies are as expensive as loads. // FIXME: Do we want some more target hooks? unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies; unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies; // Unless we are optimizing for code size, consider the // expensive operation first. if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS) return ExpensiveOpsLHS < ExpensiveOpsRHS; return (Truncates + ZExts + Shift + ExpensiveOpsLHS) < (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS); } bool operator>(const Cost &RHS) const { return RHS < *this; } bool operator<=(const Cost &RHS) const { return !(RHS < *this); } bool operator>=(const Cost &RHS) const { return !(*this < RHS); } }; // The last instruction that represent the slice. This should be a // truncate instruction. SDNode *Inst; // The original load instruction. LoadSDNode *Origin; // The right shift amount in bits from the original load. unsigned Shift; // The DAG from which Origin came from. // This is used to get some contextual information about legal types, etc. SelectionDAG *DAG; LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr, unsigned Shift = 0, SelectionDAG *DAG = nullptr) : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {} /// Get the bits used in a chunk of bits \p BitWidth large. /// \return Result is \p BitWidth and has used bits set to 1 and /// not used bits set to 0. APInt getUsedBits() const { // Reproduce the trunc(lshr) sequence: // - Start from the truncated value. // - Zero extend to the desired bit width. // - Shift left. assert(Origin && "No original load to compare against."); unsigned BitWidth = Origin->getValueSizeInBits(0); assert(Inst && "This slice is not bound to an instruction"); assert(Inst->getValueSizeInBits(0) <= BitWidth && "Extracted slice is bigger than the whole type!"); APInt UsedBits(Inst->getValueSizeInBits(0), 0); UsedBits.setAllBits(); UsedBits = UsedBits.zext(BitWidth); UsedBits <<= Shift; return UsedBits; } /// Get the size of the slice to be loaded in bytes. unsigned getLoadedSize() const { unsigned SliceSize = getUsedBits().countPopulation(); assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte."); return SliceSize / 8; } /// Get the type that will be loaded for this slice. /// Note: This may not be the final type for the slice. EVT getLoadedType() const { assert(DAG && "Missing context"); LLVMContext &Ctxt = *DAG->getContext(); return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8); } /// Get the alignment of the load used for this slice. Align getAlign() const { Align Alignment = Origin->getAlign(); uint64_t Offset = getOffsetFromBase(); if (Offset != 0) Alignment = commonAlignment(Alignment, Alignment.value() + Offset); return Alignment; } /// Check if this slice can be rewritten with legal operations. bool isLegal() const { // An invalid slice is not legal. if (!Origin || !Inst || !DAG) return false; // Offsets are for indexed load only, we do not handle that. if (!Origin->getOffset().isUndef()) return false; const TargetLowering &TLI = DAG->getTargetLoweringInfo(); // Check that the type is legal. EVT SliceType = getLoadedType(); if (!TLI.isTypeLegal(SliceType)) return false; // Check that the load is legal for this type. if (!TLI.isOperationLegal(ISD::LOAD, SliceType)) return false; // Check that the offset can be computed. // 1. Check its type. EVT PtrType = Origin->getBasePtr().getValueType(); if (PtrType == MVT::Untyped || PtrType.isExtended()) return false; // 2. Check that it fits in the immediate. if (!TLI.isLegalAddImmediate(getOffsetFromBase())) return false; // 3. Check that the computation is legal. if (!TLI.isOperationLegal(ISD::ADD, PtrType)) return false; // Check that the zext is legal if it needs one. EVT TruncateType = Inst->getValueType(0); if (TruncateType != SliceType && !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType)) return false; return true; } /// Get the offset in bytes of this slice in the original chunk of /// bits. /// \pre DAG != nullptr. uint64_t getOffsetFromBase() const { assert(DAG && "Missing context."); bool IsBigEndian = DAG->getDataLayout().isBigEndian(); assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported."); uint64_t Offset = Shift / 8; unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8; assert(!(Origin->getValueSizeInBits(0) & 0x7) && "The size of the original loaded type is not a multiple of a" " byte."); // If Offset is bigger than TySizeInBytes, it means we are loading all // zeros. This should have been optimized before in the process. assert(TySizeInBytes > Offset && "Invalid shift amount for given loaded size"); if (IsBigEndian) Offset = TySizeInBytes - Offset - getLoadedSize(); return Offset; } /// Generate the sequence of instructions to load the slice /// represented by this object and redirect the uses of this slice to /// this new sequence of instructions. /// \pre this->Inst && this->Origin are valid Instructions and this /// object passed the legal check: LoadedSlice::isLegal returned true. /// \return The last instruction of the sequence used to load the slice. SDValue loadSlice() const { assert(Inst && Origin && "Unable to replace a non-existing slice."); const SDValue &OldBaseAddr = Origin->getBasePtr(); SDValue BaseAddr = OldBaseAddr; // Get the offset in that chunk of bytes w.r.t. the endianness. int64_t Offset = static_cast(getOffsetFromBase()); assert(Offset >= 0 && "Offset too big to fit in int64_t!"); if (Offset) { // BaseAddr = BaseAddr + Offset. EVT ArithType = BaseAddr.getValueType(); SDLoc DL(Origin); BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr, DAG->getConstant(Offset, DL, ArithType)); } // Create the type of the loaded slice according to its size. EVT SliceType = getLoadedType(); // Create the load for the slice. SDValue LastInst = DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr, Origin->getPointerInfo().getWithOffset(Offset), getAlign(), Origin->getMemOperand()->getFlags()); // If the final type is not the same as the loaded type, this means that // we have to pad with zero. Create a zero extend for that. EVT FinalType = Inst->getValueType(0); if (SliceType != FinalType) LastInst = DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst); return LastInst; } /// Check if this slice can be merged with an expensive cross register /// bank copy. E.g., /// i = load i32 /// f = bitcast i32 i to float bool canMergeExpensiveCrossRegisterBankCopy() const { if (!Inst || !Inst->hasOneUse()) return false; SDNode *Use = *Inst->use_begin(); if (Use->getOpcode() != ISD::BITCAST) return false; assert(DAG && "Missing context"); const TargetLowering &TLI = DAG->getTargetLoweringInfo(); EVT ResVT = Use->getValueType(0); const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent()); const TargetRegisterClass *ArgRC = TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(), Use->getOperand(0)->isDivergent()); if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT)) return false; // At this point, we know that we perform a cross-register-bank copy. // Check if it is expensive. const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo(); // Assume bitcasts are cheap, unless both register classes do not // explicitly share a common sub class. if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC)) return false; // Check if it will be merged with the load. // 1. Check the alignment / fast memory access constraint. bool IsFast = false; if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT, Origin->getAddressSpace(), getAlign(), Origin->getMemOperand()->getFlags(), &IsFast) || !IsFast) return false; // 2. Check that the load is a legal operation for that type. if (!TLI.isOperationLegal(ISD::LOAD, ResVT)) return false; // 3. Check that we do not have a zext in the way. if (Inst->getValueType(0) != getLoadedType()) return false; return true; } }; } // end anonymous namespace /// Check that all bits set in \p UsedBits form a dense region, i.e., /// \p UsedBits looks like 0..0 1..1 0..0. static bool areUsedBitsDense(const APInt &UsedBits) { // If all the bits are one, this is dense! if (UsedBits.isAllOnes()) return true; // Get rid of the unused bits on the right. APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros()); // Get rid of the unused bits on the left. if (NarrowedUsedBits.countLeadingZeros()) NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits()); // Check that the chunk of bits is completely used. return NarrowedUsedBits.isAllOnes(); } /// Check whether or not \p First and \p Second are next to each other /// in memory. This means that there is no hole between the bits loaded /// by \p First and the bits loaded by \p Second. static bool areSlicesNextToEachOther(const LoadedSlice &First, const LoadedSlice &Second) { assert(First.Origin == Second.Origin && First.Origin && "Unable to match different memory origins."); APInt UsedBits = First.getUsedBits(); assert((UsedBits & Second.getUsedBits()) == 0 && "Slices are not supposed to overlap."); UsedBits |= Second.getUsedBits(); return areUsedBitsDense(UsedBits); } /// Adjust the \p GlobalLSCost according to the target /// paring capabilities and the layout of the slices. /// \pre \p GlobalLSCost should account for at least as many loads as /// there is in the slices in \p LoadedSlices. static void adjustCostForPairing(SmallVectorImpl &LoadedSlices, LoadedSlice::Cost &GlobalLSCost) { unsigned NumberOfSlices = LoadedSlices.size(); // If there is less than 2 elements, no pairing is possible. if (NumberOfSlices < 2) return; // Sort the slices so that elements that are likely to be next to each // other in memory are next to each other in the list. llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) { assert(LHS.Origin == RHS.Origin && "Different bases not implemented."); return LHS.getOffsetFromBase() < RHS.getOffsetFromBase(); }); const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo(); // First (resp. Second) is the first (resp. Second) potentially candidate // to be placed in a paired load. const LoadedSlice *First = nullptr; const LoadedSlice *Second = nullptr; for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice, // Set the beginning of the pair. First = Second) { Second = &LoadedSlices[CurrSlice]; // If First is NULL, it means we start a new pair. // Get to the next slice. if (!First) continue; EVT LoadedType = First->getLoadedType(); // If the types of the slices are different, we cannot pair them. if (LoadedType != Second->getLoadedType()) continue; // Check if the target supplies paired loads for this type. Align RequiredAlignment; if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) { // move to the next pair, this type is hopeless. Second = nullptr; continue; } // Check if we meet the alignment requirement. if (First->getAlign() < RequiredAlignment) continue; // Check that both loads are next to each other in memory. if (!areSlicesNextToEachOther(*First, *Second)) continue; assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!"); --GlobalLSCost.Loads; // Move to the next pair. Second = nullptr; } } /// Check the profitability of all involved LoadedSlice. /// Currently, it is considered profitable if there is exactly two /// involved slices (1) which are (2) next to each other in memory, and /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3). /// /// Note: The order of the elements in \p LoadedSlices may be modified, but not /// the elements themselves. /// /// FIXME: When the cost model will be mature enough, we can relax /// constraints (1) and (2). static bool isSlicingProfitable(SmallVectorImpl &LoadedSlices, const APInt &UsedBits, bool ForCodeSize) { unsigned NumberOfSlices = LoadedSlices.size(); if (StressLoadSlicing) return NumberOfSlices > 1; // Check (1). if (NumberOfSlices != 2) return false; // Check (2). if (!areUsedBitsDense(UsedBits)) return false; // Check (3). LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize); // The original code has one big load. OrigCost.Loads = 1; for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) { const LoadedSlice &LS = LoadedSlices[CurrSlice]; // Accumulate the cost of all the slices. LoadedSlice::Cost SliceCost(LS, ForCodeSize); GlobalSlicingCost += SliceCost; // Account as cost in the original configuration the gain obtained // with the current slices. OrigCost.addSliceGain(LS); } // If the target supports paired load, adjust the cost accordingly. adjustCostForPairing(LoadedSlices, GlobalSlicingCost); return OrigCost > GlobalSlicingCost; } /// If the given load, \p LI, is used only by trunc or trunc(lshr) /// operations, split it in the various pieces being extracted. /// /// This sort of thing is introduced by SROA. /// This slicing takes care not to insert overlapping loads. /// \pre LI is a simple load (i.e., not an atomic or volatile load). bool DAGCombiner::SliceUpLoad(SDNode *N) { if (Level < AfterLegalizeDAG) return false; LoadSDNode *LD = cast(N); if (!LD->isSimple() || !ISD::isNormalLoad(LD) || !LD->getValueType(0).isInteger()) return false; // The algorithm to split up a load of a scalable vector into individual // elements currently requires knowing the length of the loaded type, // so will need adjusting to work on scalable vectors. if (LD->getValueType(0).isScalableVector()) return false; // Keep track of already used bits to detect overlapping values. // In that case, we will just abort the transformation. APInt UsedBits(LD->getValueSizeInBits(0), 0); SmallVector LoadedSlices; // Check if this load is used as several smaller chunks of bits. // Basically, look for uses in trunc or trunc(lshr) and record a new chain // of computation for each trunc. for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end(); UI != UIEnd; ++UI) { // Skip the uses of the chain. if (UI.getUse().getResNo() != 0) continue; SDNode *User = *UI; unsigned Shift = 0; // Check if this is a trunc(lshr). if (User->getOpcode() == ISD::SRL && User->hasOneUse() && isa(User->getOperand(1))) { Shift = User->getConstantOperandVal(1); User = *User->use_begin(); } // At this point, User is a Truncate, iff we encountered, trunc or // trunc(lshr). if (User->getOpcode() != ISD::TRUNCATE) return false; // The width of the type must be a power of 2 and greater than 8-bits. // Otherwise the load cannot be represented in LLVM IR. // Moreover, if we shifted with a non-8-bits multiple, the slice // will be across several bytes. We do not support that. unsigned Width = User->getValueSizeInBits(0); if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7)) return false; // Build the slice for this chain of computations. LoadedSlice LS(User, LD, Shift, &DAG); APInt CurrentUsedBits = LS.getUsedBits(); // Check if this slice overlaps with another. if ((CurrentUsedBits & UsedBits) != 0) return false; // Update the bits used globally. UsedBits |= CurrentUsedBits; // Check if the new slice would be legal. if (!LS.isLegal()) return false; // Record the slice. LoadedSlices.push_back(LS); } // Abort slicing if it does not seem to be profitable. if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize)) return false; ++SlicedLoads; // Rewrite each chain to use an independent load. // By construction, each chain can be represented by a unique load. // Prepare the argument for the new token factor for all the slices. SmallVector ArgChains; for (const LoadedSlice &LS : LoadedSlices) { SDValue SliceInst = LS.loadSlice(); CombineTo(LS.Inst, SliceInst, true); if (SliceInst.getOpcode() != ISD::LOAD) SliceInst = SliceInst.getOperand(0); assert(SliceInst->getOpcode() == ISD::LOAD && "It takes more than a zext to get to the loaded slice!!"); ArgChains.push_back(SliceInst.getValue(1)); } SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, ArgChains); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); AddToWorklist(Chain.getNode()); return true; } /// Check to see if V is (and load (ptr), imm), where the load is having /// specific bytes cleared out. If so, return the byte size being masked out /// and the shift amount. static std::pair CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { std::pair Result(0, 0); // Check for the structure we're looking for. if (V->getOpcode() != ISD::AND || !isa(V->getOperand(1)) || !ISD::isNormalLoad(V->getOperand(0).getNode())) return Result; // Check the chain and pointer. LoadSDNode *LD = cast(V->getOperand(0)); if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer. // This only handles simple types. if (V.getValueType() != MVT::i16 && V.getValueType() != MVT::i32 && V.getValueType() != MVT::i64) return Result; // Check the constant mask. Invert it so that the bits being masked out are // 0 and the bits being kept are 1. Use getSExtValue so that leading bits // follow the sign bit for uniformity. uint64_t NotMask = ~cast(V->getOperand(1))->getSExtValue(); unsigned NotMaskLZ = countLeadingZeros(NotMask); if (NotMaskLZ & 7) return Result; // Must be multiple of a byte. unsigned NotMaskTZ = countTrailingZeros(NotMask); if (NotMaskTZ & 7) return Result; // Must be multiple of a byte. if (NotMaskLZ == 64) return Result; // All zero mask. // See if we have a continuous run of bits. If so, we have 0*1+0* if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64) return Result; // Adjust NotMaskLZ down to be from the actual size of the int instead of i64. if (V.getValueType() != MVT::i64 && NotMaskLZ) NotMaskLZ -= 64-V.getValueSizeInBits(); unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8; switch (MaskedBytes) { case 1: case 2: case 4: break; default: return Result; // All one mask, or 5-byte mask. } // Verify that the first bit starts at a multiple of mask so that the access // is aligned the same as the access width. if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result; // For narrowing to be valid, it must be the case that the load the // immediately preceding memory operation before the store. if (LD == Chain.getNode()) ; // ok. else if (Chain->getOpcode() == ISD::TokenFactor && SDValue(LD, 1).hasOneUse()) { // LD has only 1 chain use so they are no indirect dependencies. if (!LD->isOperandOf(Chain.getNode())) return Result; } else return Result; // Fail. Result.first = MaskedBytes; Result.second = NotMaskTZ/8; return Result; } /// Check to see if IVal is something that provides a value as specified by /// MaskInfo. If so, replace the specified store with a narrower store of /// truncated IVal. static SDValue ShrinkLoadReplaceStoreWithStore(const std::pair &MaskInfo, SDValue IVal, StoreSDNode *St, DAGCombiner *DC) { unsigned NumBytes = MaskInfo.first; unsigned ByteShift = MaskInfo.second; SelectionDAG &DAG = DC->getDAG(); // Check to see if IVal is all zeros in the part being masked in by the 'or' // that uses this. If not, this is not a replacement. APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(), ByteShift*8, (ByteShift+NumBytes)*8); if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue(); // Check that it is legal on the target to do this. It is legal if the new // VT we're shrinking to (i8/i16/i32) is legal or we're still before type // legalization. If the source type is legal, but the store type isn't, see // if we can use a truncating store. MVT VT = MVT::getIntegerVT(NumBytes * 8); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); bool UseTruncStore; if (DC->isTypeLegal(VT)) UseTruncStore = false; else if (TLI.isTypeLegal(IVal.getValueType()) && TLI.isTruncStoreLegal(IVal.getValueType(), VT)) UseTruncStore = true; else return SDValue(); // Check that the target doesn't think this is a bad idea. if (St->getMemOperand() && !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, *St->getMemOperand())) return SDValue(); // Okay, we can do this! Replace the 'St' store with a store of IVal that is // shifted by ByteShift and truncated down to NumBytes. if (ByteShift) { SDLoc DL(IVal); IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal, DAG.getConstant(ByteShift*8, DL, DC->getShiftAmountTy(IVal.getValueType()))); } // Figure out the offset for the store and the alignment of the access. unsigned StOffset; if (DAG.getDataLayout().isLittleEndian()) StOffset = ByteShift; else StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes; SDValue Ptr = St->getBasePtr(); if (StOffset) { SDLoc DL(IVal); Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL); } ++OpsNarrowed; if (UseTruncStore) return DAG.getTruncStore(St->getChain(), SDLoc(St), IVal, Ptr, St->getPointerInfo().getWithOffset(StOffset), VT, St->getOriginalAlign()); // Truncate down to the new size. IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal); return DAG .getStore(St->getChain(), SDLoc(St), IVal, Ptr, St->getPointerInfo().getWithOffset(StOffset), St->getOriginalAlign()); } /// Look for sequence of load / op / store where op is one of 'or', 'xor', and /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try /// narrowing the load and store if it would end up being a win for performance /// or code size. SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { StoreSDNode *ST = cast(N); if (!ST->isSimple()) return SDValue(); SDValue Chain = ST->getChain(); SDValue Value = ST->getValue(); SDValue Ptr = ST->getBasePtr(); EVT VT = Value.getValueType(); if (ST->isTruncatingStore() || VT.isVector()) return SDValue(); unsigned Opc = Value.getOpcode(); if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || !Value.hasOneUse()) return SDValue(); // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst // is a byte mask indicating a consecutive number of bytes, check to see if // Y is known to provide just those bytes. If so, we try to replace the // load + replace + store sequence with a single (narrower) store, which makes // the load dead. if (Opc == ISD::OR && EnableShrinkLoadReplaceStoreWithStore) { std::pair MaskedLoad; MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain); if (MaskedLoad.first) if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, Value.getOperand(1), ST,this)) return NewST; // Or is commutative, so try swapping X and Y. MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain); if (MaskedLoad.first) if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, Value.getOperand(0), ST,this)) return NewST; } if (!EnableReduceLoadOpStoreWidth) return SDValue(); if (Value.getOperand(1).getOpcode() != ISD::Constant) return SDValue(); SDValue N0 = Value.getOperand(0); if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && Chain == SDValue(N0.getNode(), 1)) { LoadSDNode *LD = cast(N0); if (LD->getBasePtr() != Ptr || LD->getPointerInfo().getAddrSpace() != ST->getPointerInfo().getAddrSpace()) return SDValue(); // Find the type to narrow it the load / op / store to. SDValue N1 = Value.getOperand(1); unsigned BitWidth = N1.getValueSizeInBits(); APInt Imm = cast(N1)->getAPIntValue(); if (Opc == ISD::AND) Imm ^= APInt::getAllOnes(BitWidth); if (Imm == 0 || Imm.isAllOnes()) return SDValue(); unsigned ShAmt = Imm.countTrailingZeros(); unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; unsigned NewBW = NextPowerOf2(MSB - ShAmt); EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); // The narrowing should be profitable, the load/store operation should be // legal (or custom) and the store size should be equal to the NewVT width. while (NewBW < BitWidth && (NewVT.getStoreSizeInBits() != NewBW || !TLI.isOperationLegalOrCustom(Opc, NewVT) || !TLI.isNarrowingProfitable(VT, NewVT))) { NewBW = NextPowerOf2(NewBW); NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); } if (NewBW >= BitWidth) return SDValue(); // If the lsb changed does not start at the type bitwidth boundary, // start at the previous one. if (ShAmt % NewBW) ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW; APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, std::min(BitWidth, ShAmt + NewBW)); if ((Imm & Mask) == Imm) { APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW); if (Opc == ISD::AND) NewImm ^= APInt::getAllOnes(NewBW); uint64_t PtrOff = ShAmt / 8; // For big endian targets, we need to adjust the offset to the pointer to // load the correct bytes. if (DAG.getDataLayout().isBigEndian()) PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; bool IsFast = false; Align NewAlign = commonAlignment(LD->getAlign(), PtrOff); if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT, LD->getAddressSpace(), NewAlign, LD->getMemOperand()->getFlags(), &IsFast) || !IsFast) return SDValue(); SDValue NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(PtrOff), SDLoc(LD)); SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr, LD->getPointerInfo().getWithOffset(PtrOff), NewAlign, LD->getMemOperand()->getFlags(), LD->getAAInfo()); SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD, DAG.getConstant(NewImm, SDLoc(Value), NewVT)); SDValue NewST = DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr, ST->getPointerInfo().getWithOffset(PtrOff), NewAlign); AddToWorklist(NewPtr.getNode()); AddToWorklist(NewLD.getNode()); AddToWorklist(NewVal.getNode()); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1)); ++OpsNarrowed; return NewST; } } return SDValue(); } /// For a given floating point load / store pair, if the load value isn't used /// by any other operations, then consider transforming the pair to integer /// load / store operations if the target deems the transformation profitable. SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { StoreSDNode *ST = cast(N); SDValue Value = ST->getValue(); if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) && Value.hasOneUse()) { LoadSDNode *LD = cast(Value); EVT VT = LD->getMemoryVT(); if (!VT.isFloatingPoint() || VT != ST->getMemoryVT() || LD->isNonTemporal() || ST->isNonTemporal() || LD->getPointerInfo().getAddrSpace() != 0 || ST->getPointerInfo().getAddrSpace() != 0) return SDValue(); TypeSize VTSize = VT.getSizeInBits(); // We don't know the size of scalable types at compile time so we cannot // create an integer of the equivalent size. if (VTSize.isScalable()) return SDValue(); bool FastLD = false, FastST = false; EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize()); if (!TLI.isOperationLegal(ISD::LOAD, IntVT) || !TLI.isOperationLegal(ISD::STORE, IntVT) || !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) || !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT) || !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT, *LD->getMemOperand(), &FastLD) || !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT, *ST->getMemOperand(), &FastST) || !FastLD || !FastST) return SDValue(); SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), LD->getAlign()); SDValue NewST = DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(), ST->getPointerInfo(), ST->getAlign()); AddToWorklist(NewLD.getNode()); AddToWorklist(NewST.getNode()); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1)); ++LdStFP2Int; return NewST; } return SDValue(); } // This is a helper function for visitMUL to check the profitability // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). // MulNode is the original multiply, AddNode is (add x, c1), // and ConstNode is c2. // // If the (add x, c1) has multiple uses, we could increase // the number of adds if we make this transformation. // It would only be worth doing this if we can remove a // multiply in the process. Check for that here. // To illustrate: // (A + c1) * c3 // (A + c2) * c3 // We're checking for cases where we have common "c3 * A" expressions. bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode, SDValue ConstNode) { APInt Val; // If the add only has one use, and the target thinks the folding is // profitable or does not lead to worse code, this would be OK to do. if (AddNode->hasOneUse() && TLI.isMulAddWithConstProfitable(AddNode, ConstNode)) return true; // Walk all the users of the constant with which we're multiplying. for (SDNode *Use : ConstNode->uses()) { if (Use == MulNode) // This use is the one we're on right now. Skip it. continue; if (Use->getOpcode() == ISD::MUL) { // We have another multiply use. SDNode *OtherOp; SDNode *MulVar = AddNode.getOperand(0).getNode(); // OtherOp is what we're multiplying against the constant. if (Use->getOperand(0) == ConstNode) OtherOp = Use->getOperand(1).getNode(); else OtherOp = Use->getOperand(0).getNode(); // Check to see if multiply is with the same operand of our "add". // // ConstNode = CONST // Use = ConstNode * A <-- visiting Use. OtherOp is A. // ... // AddNode = (A + c1) <-- MulVar is A. // = AddNode * ConstNode <-- current visiting instruction. // // If we make this transformation, we will have a common // multiply (ConstNode * A) that we can save. if (OtherOp == MulVar) return true; // Now check to see if a future expansion will give us a common // multiply. // // ConstNode = CONST // AddNode = (A + c1) // ... = AddNode * ConstNode <-- current visiting instruction. // ... // OtherOp = (A + c2) // Use = OtherOp * ConstNode <-- visiting Use. // // If we make this transformation, we will have a common // multiply (CONST * A) after we also do the same transformation // to the "t2" instruction. if (OtherOp->getOpcode() == ISD::ADD && DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) && OtherOp->getOperand(0).getNode() == MulVar) return true; } } // Didn't find a case where this would be profitable. return false; } SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl &StoreNodes, unsigned NumStores) { SmallVector Chains; SmallPtrSet Visited; SDLoc StoreDL(StoreNodes[0].MemNode); for (unsigned i = 0; i < NumStores; ++i) { Visited.insert(StoreNodes[i].MemNode); } // don't include nodes that are children or repeated nodes. for (unsigned i = 0; i < NumStores; ++i) { if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second) Chains.push_back(StoreNodes[i].MemNode->getChain()); } assert(Chains.size() > 0 && "Chain should have generated a chain"); return DAG.getTokenFactor(StoreDL, Chains); } bool DAGCombiner::mergeStoresOfConstantsOrVecElts( SmallVectorImpl &StoreNodes, EVT MemVT, unsigned NumStores, bool IsConstantSrc, bool UseVector, bool UseTrunc) { // Make sure we have something to merge. if (NumStores < 2) return false; assert((!UseTrunc || !UseVector) && "This optimization cannot emit a vector truncating store"); // The latest Node in the DAG. SDLoc DL(StoreNodes[0].MemNode); TypeSize ElementSizeBits = MemVT.getStoreSizeInBits(); unsigned SizeInBits = NumStores * ElementSizeBits; unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; Optional Flags; AAMDNodes AAInfo; for (unsigned I = 0; I != NumStores; ++I) { StoreSDNode *St = cast(StoreNodes[I].MemNode); if (!Flags) { Flags = St->getMemOperand()->getFlags(); AAInfo = St->getAAInfo(); continue; } // Skip merging if there's an inconsistent flag. if (Flags != St->getMemOperand()->getFlags()) return false; // Concatenate AA metadata. AAInfo = AAInfo.concat(St->getAAInfo()); } EVT StoreTy; if (UseVector) { unsigned Elts = NumStores * NumMemElts; // Get the type for the merged vector store. StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); } else StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits); SDValue StoredVal; if (UseVector) { if (IsConstantSrc) { SmallVector BuildVector; for (unsigned I = 0; I != NumStores; ++I) { StoreSDNode *St = cast(StoreNodes[I].MemNode); SDValue Val = St->getValue(); // If constant is of the wrong type, convert it now. if (MemVT != Val.getValueType()) { Val = peekThroughBitcasts(Val); // Deal with constants of wrong size. if (ElementSizeBits != Val.getValueSizeInBits()) { EVT IntMemVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()); if (isa(Val)) { // Not clear how to truncate FP values. return false; } if (auto *C = dyn_cast(Val)) Val = DAG.getConstant(C->getAPIntValue() .zextOrTrunc(Val.getValueSizeInBits()) .zextOrTrunc(ElementSizeBits), SDLoc(C), IntMemVT); } // Make sure correctly size type is the correct type. Val = DAG.getBitcast(MemVT, Val); } BuildVector.push_back(Val); } StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL, StoreTy, BuildVector); } else { SmallVector Ops; for (unsigned i = 0; i < NumStores; ++i) { StoreSDNode *St = cast(StoreNodes[i].MemNode); SDValue Val = peekThroughBitcasts(St->getValue()); // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of // type MemVT. If the underlying value is not the correct // type, but it is an extraction of an appropriate vector we // can recast Val to be of the correct type. This may require // converting between EXTRACT_VECTOR_ELT and // EXTRACT_SUBVECTOR. if ((MemVT != Val.getValueType()) && (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT || Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) { EVT MemVTScalarTy = MemVT.getScalarType(); // We may need to add a bitcast here to get types to line up. if (MemVTScalarTy != Val.getValueType().getScalarType()) { Val = DAG.getBitcast(MemVT, Val); } else { unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR : ISD::EXTRACT_VECTOR_ELT; SDValue Vec = Val.getOperand(0); SDValue Idx = Val.getOperand(1); Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx); } } Ops.push_back(Val); } // Build the extracted vector elements back into a vector. StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL, StoreTy, Ops); } } else { // We should always use a vector store when merging extracted vector // elements, so this path implies a store of constants. assert(IsConstantSrc && "Merged vector elements should use vector store"); APInt StoreInt(SizeInBits, 0); // Construct a single integer constant which is made of the smaller // constant inputs. bool IsLE = DAG.getDataLayout().isLittleEndian(); for (unsigned i = 0; i < NumStores; ++i) { unsigned Idx = IsLE ? (NumStores - 1 - i) : i; StoreSDNode *St = cast(StoreNodes[Idx].MemNode); SDValue Val = St->getValue(); Val = peekThroughBitcasts(Val); StoreInt <<= ElementSizeBits; if (ConstantSDNode *C = dyn_cast(Val)) { StoreInt |= C->getAPIntValue() .zextOrTrunc(ElementSizeBits) .zextOrTrunc(SizeInBits); } else if (ConstantFPSDNode *C = dyn_cast(Val)) { StoreInt |= C->getValueAPF() .bitcastToAPInt() .zextOrTrunc(ElementSizeBits) .zextOrTrunc(SizeInBits); // If fp truncation is necessary give up for now. if (MemVT.getSizeInBits() != ElementSizeBits) return false; } else { llvm_unreachable("Invalid constant element type"); } } // Create the new Load and Store operations. StoredVal = DAG.getConstant(StoreInt, DL, StoreTy); } LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores); // make sure we use trunc store if it's necessary to be legal. SDValue NewStore; if (!UseTrunc) { NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), FirstInChain->getAlign(), *Flags, AAInfo); } else { // Must be realized as a trunc store EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType()); unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits(); ConstantSDNode *C = cast(StoredVal); SDValue ExtendedStoreVal = DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL, LegalizedStoredValTy); NewStore = DAG.getTruncStore( NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/, FirstInChain->getAlign(), *Flags, AAInfo); } // Replace all merged stores with the new store. for (unsigned i = 0; i < NumStores; ++i) CombineTo(StoreNodes[i].MemNode, NewStore); AddToWorklist(NewChain.getNode()); return true; } void DAGCombiner::getStoreMergeCandidates( StoreSDNode *St, SmallVectorImpl &StoreNodes, SDNode *&RootNode) { // This holds the base pointer, index, and the offset in bytes from the base // pointer. We must have a base and an offset. Do not handle stores to undef // base pointers. BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG); if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef()) return; SDValue Val = peekThroughBitcasts(St->getValue()); StoreSource StoreSrc = getStoreSource(Val); assert(StoreSrc != StoreSource::Unknown && "Expected known source for store"); // Match on loadbaseptr if relevant. EVT MemVT = St->getMemoryVT(); BaseIndexOffset LBasePtr; EVT LoadVT; if (StoreSrc == StoreSource::Load) { auto *Ld = cast(Val); LBasePtr = BaseIndexOffset::match(Ld, DAG); LoadVT = Ld->getMemoryVT(); // Load and store should be the same type. if (MemVT != LoadVT) return; // Loads must only have one use. if (!Ld->hasNUsesOfValue(1, 0)) return; // The memory operands must not be volatile/indexed/atomic. // TODO: May be able to relax for unordered atomics (see D66309) if (!Ld->isSimple() || Ld->isIndexed()) return; } auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr, int64_t &Offset) -> bool { // The memory operands must not be volatile/indexed/atomic. // TODO: May be able to relax for unordered atomics (see D66309) if (!Other->isSimple() || Other->isIndexed()) return false; // Don't mix temporal stores with non-temporal stores. if (St->isNonTemporal() != Other->isNonTemporal()) return false; SDValue OtherBC = peekThroughBitcasts(Other->getValue()); // Allow merging constants of different types as integers. bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT()) : Other->getMemoryVT() != MemVT; switch (StoreSrc) { case StoreSource::Load: { if (NoTypeMatch) return false; // The Load's Base Ptr must also match. auto *OtherLd = dyn_cast(OtherBC); if (!OtherLd) return false; BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG); if (LoadVT != OtherLd->getMemoryVT()) return false; // Loads must only have one use. if (!OtherLd->hasNUsesOfValue(1, 0)) return false; // The memory operands must not be volatile/indexed/atomic. // TODO: May be able to relax for unordered atomics (see D66309) if (!OtherLd->isSimple() || OtherLd->isIndexed()) return false; // Don't mix temporal loads with non-temporal loads. if (cast(Val)->isNonTemporal() != OtherLd->isNonTemporal()) return false; if (!(LBasePtr.equalBaseIndex(LPtr, DAG))) return false; break; } case StoreSource::Constant: if (NoTypeMatch) return false; if (!isIntOrFPConstant(OtherBC)) return false; break; case StoreSource::Extract: // Do not merge truncated stores here. if (Other->isTruncatingStore()) return false; if (!MemVT.bitsEq(OtherBC.getValueType())) return false; if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT && OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR) return false; break; default: llvm_unreachable("Unhandled store source for merging"); } Ptr = BaseIndexOffset::match(Other, DAG); return (BasePtr.equalBaseIndex(Ptr, DAG, Offset)); }; // Check if the pair of StoreNode and the RootNode already bail out many // times which is over the limit in dependence check. auto OverLimitInDependenceCheck = [&](SDNode *StoreNode, SDNode *RootNode) -> bool { auto RootCount = StoreRootCountMap.find(StoreNode); return RootCount != StoreRootCountMap.end() && RootCount->second.first == RootNode && RootCount->second.second > StoreMergeDependenceLimit; }; auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) { // This must be a chain use. if (UseIter.getOperandNo() != 0) return; if (auto *OtherStore = dyn_cast(*UseIter)) { BaseIndexOffset Ptr; int64_t PtrDiff; if (CandidateMatch(OtherStore, Ptr, PtrDiff) && !OverLimitInDependenceCheck(OtherStore, RootNode)) StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff)); } }; // We looking for a root node which is an ancestor to all mergable // stores. We search up through a load, to our root and then down // through all children. For instance we will find Store{1,2,3} if // St is Store1, Store2. or Store3 where the root is not a load // which always true for nonvolatile ops. TODO: Expand // the search to find all valid candidates through multiple layers of loads. // // Root // |-------|-------| // Load Load Store3 // | | // Store1 Store2 // // FIXME: We should be able to climb and // descend TokenFactors to find candidates as well. RootNode = St->getChain().getNode(); unsigned NumNodesExplored = 0; const unsigned MaxSearchNodes = 1024; if (auto *Ldn = dyn_cast(RootNode)) { RootNode = Ldn->getChain().getNode(); for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) { if (I.getOperandNo() == 0 && isa(*I)) { // walk down chain for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2) TryToAddCandidate(I2); } // Check stores that depend on the root (e.g. Store 3 in the chart above). if (I.getOperandNo() == 0 && isa(*I)) { TryToAddCandidate(I); } } } else { for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) TryToAddCandidate(I); } } // We need to check that merging these stores does not cause a loop in the // DAG. Any store candidate may depend on another candidate indirectly through // its operands. Check in parallel by searching up from operands of candidates. bool DAGCombiner::checkMergeStoreCandidatesForDependencies( SmallVectorImpl &StoreNodes, unsigned NumStores, SDNode *RootNode) { // FIXME: We should be able to truncate a full search of // predecessors by doing a BFS and keeping tabs the originating // stores from which worklist nodes come from in a similar way to // TokenFactor simplfication. SmallPtrSet Visited; SmallVector Worklist; // RootNode is a predecessor to all candidates so we need not search // past it. Add RootNode (peeking through TokenFactors). Do not count // these towards size check. Worklist.push_back(RootNode); while (!Worklist.empty()) { auto N = Worklist.pop_back_val(); if (!Visited.insert(N).second) continue; // Already present in Visited. if (N->getOpcode() == ISD::TokenFactor) { for (SDValue Op : N->ops()) Worklist.push_back(Op.getNode()); } } // Don't count pruning nodes towards max. unsigned int Max = 1024 + Visited.size(); // Search Ops of store candidates. for (unsigned i = 0; i < NumStores; ++i) { SDNode *N = StoreNodes[i].MemNode; // Of the 4 Store Operands: // * Chain (Op 0) -> We have already considered these // in candidate selection, but only by following the // chain dependencies. We could still have a chain // dependency to a load, that has a non-chain dep to // another load, that depends on a store, etc. So it is // possible to have dependencies that consist of a mix // of chain and non-chain deps, and we need to include // chain operands in the analysis here.. // * Value (Op 1) -> Cycles may happen (e.g. through load chains) // * Address (Op 2) -> Merged addresses may only vary by a fixed constant, // but aren't necessarily fromt the same base node, so // cycles possible (e.g. via indexed store). // * (Op 3) -> Represents the pre or post-indexing offset (or undef for // non-indexed stores). Not constant on all targets (e.g. ARM) // and so can participate in a cycle. for (unsigned j = 0; j < N->getNumOperands(); ++j) Worklist.push_back(N->getOperand(j).getNode()); } // Search through DAG. We can stop early if we find a store node. for (unsigned i = 0; i < NumStores; ++i) if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist, Max)) { // If the searching bail out, record the StoreNode and RootNode in the // StoreRootCountMap. If we have seen the pair many times over a limit, // we won't add the StoreNode into StoreNodes set again. if (Visited.size() >= Max) { auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode]; if (RootCount.first == RootNode) RootCount.second++; else RootCount = {RootNode, 1}; } return false; } return true; } unsigned DAGCombiner::getConsecutiveStores(SmallVectorImpl &StoreNodes, int64_t ElementSizeBytes) const { while (true) { // Find a store past the width of the first store. size_t StartIdx = 0; while ((StartIdx + 1 < StoreNodes.size()) && StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes != StoreNodes[StartIdx + 1].OffsetFromBase) ++StartIdx; // Bail if we don't have enough candidates to merge. if (StartIdx + 1 >= StoreNodes.size()) return 0; // Trim stores that overlapped with the first store. if (StartIdx) StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx); // Scan the memory operations on the chain and find the first // non-consecutive store memory address. unsigned NumConsecutiveStores = 1; int64_t StartAddress = StoreNodes[0].OffsetFromBase; // Check that the addresses are consecutive starting from the second // element in the list of stores. for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) { int64_t CurrAddress = StoreNodes[i].OffsetFromBase; if (CurrAddress - StartAddress != (ElementSizeBytes * i)) break; NumConsecutiveStores = i + 1; } if (NumConsecutiveStores > 1) return NumConsecutiveStores; // There are no consecutive stores at the start of the list. // Remove the first store and try again. StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1); } } bool DAGCombiner::tryStoreMergeOfConstants( SmallVectorImpl &StoreNodes, unsigned NumConsecutiveStores, EVT MemVT, SDNode *RootNode, bool AllowVectors) { LLVMContext &Context = *DAG.getContext(); const DataLayout &DL = DAG.getDataLayout(); int64_t ElementSizeBytes = MemVT.getStoreSize(); unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; bool MadeChange = false; // Store the constants into memory as one consecutive store. while (NumConsecutiveStores >= 2) { LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; unsigned FirstStoreAS = FirstInChain->getAddressSpace(); Align FirstStoreAlign = FirstInChain->getAlign(); unsigned LastLegalType = 1; unsigned LastLegalVectorType = 1; bool LastIntegerTrunc = false; bool NonZero = false; unsigned FirstZeroAfterNonZero = NumConsecutiveStores; for (unsigned i = 0; i < NumConsecutiveStores; ++i) { StoreSDNode *ST = cast(StoreNodes[i].MemNode); SDValue StoredVal = ST->getValue(); bool IsElementZero = false; if (ConstantSDNode *C = dyn_cast(StoredVal)) IsElementZero = C->isZero(); else if (ConstantFPSDNode *C = dyn_cast(StoredVal)) IsElementZero = C->getConstantFPValue()->isNullValue(); if (IsElementZero) { if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores) FirstZeroAfterNonZero = i; } NonZero |= !IsElementZero; // Find a legal type for the constant store. unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); bool IsFast = false; // Break early when size is too large to be legal. if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits) break; if (TLI.isTypeLegal(StoreTy) && TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG.getMachineFunction()) && TLI.allowsMemoryAccess(Context, DL, StoreTy, *FirstInChain->getMemOperand(), &IsFast) && IsFast) { LastIntegerTrunc = false; LastLegalType = i + 1; // Or check whether a truncstore is legal. } else if (TLI.getTypeAction(Context, StoreTy) == TargetLowering::TypePromoteInteger) { EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG.getMachineFunction()) && TLI.allowsMemoryAccess(Context, DL, StoreTy, *FirstInChain->getMemOperand(), &IsFast) && IsFast) { LastIntegerTrunc = true; LastLegalType = i + 1; } } // We only use vectors if the constant is known to be zero or the // target allows it and the function is not marked with the // noimplicitfloat attribute. if ((!NonZero || TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) && AllowVectors) { // Find a legal type for the vector store. unsigned Elts = (i + 1) * NumMemElts; EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) && TLI.allowsMemoryAccess(Context, DL, Ty, *FirstInChain->getMemOperand(), &IsFast) && IsFast) LastLegalVectorType = i + 1; } } bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors; unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType; bool UseTrunc = LastIntegerTrunc && !UseVector; // Check if we found a legal integer type that creates a meaningful // merge. if (NumElem < 2) { // We know that candidate stores are in order and of correct // shape. While there is no mergeable sequence from the // beginning one may start later in the sequence. The only // reason a merge of size N could have failed where another of // the same size would not have, is if the alignment has // improved or we've dropped a non-zero value. Drop as many // candidates as we can here. unsigned NumSkip = 1; while ((NumSkip < NumConsecutiveStores) && (NumSkip < FirstZeroAfterNonZero) && (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign)) NumSkip++; StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); NumConsecutiveStores -= NumSkip; continue; } // Check that we can merge these candidates without causing a cycle. if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem, RootNode)) { StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); NumConsecutiveStores -= NumElem; continue; } MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, /*IsConstantSrc*/ true, UseVector, UseTrunc); // Remove merged stores for next iteration. StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); NumConsecutiveStores -= NumElem; } return MadeChange; } bool DAGCombiner::tryStoreMergeOfExtracts( SmallVectorImpl &StoreNodes, unsigned NumConsecutiveStores, EVT MemVT, SDNode *RootNode) { LLVMContext &Context = *DAG.getContext(); const DataLayout &DL = DAG.getDataLayout(); unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; bool MadeChange = false; // Loop on Consecutive Stores on success. while (NumConsecutiveStores >= 2) { LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; unsigned FirstStoreAS = FirstInChain->getAddressSpace(); Align FirstStoreAlign = FirstInChain->getAlign(); unsigned NumStoresToMerge = 1; for (unsigned i = 0; i < NumConsecutiveStores; ++i) { // Find a legal type for the vector store. unsigned Elts = (i + 1) * NumMemElts; EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); bool IsFast = false; // Break early when size is too large to be legal. if (Ty.getSizeInBits() > MaximumLegalStoreInBits) break; if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) && TLI.allowsMemoryAccess(Context, DL, Ty, *FirstInChain->getMemOperand(), &IsFast) && IsFast) NumStoresToMerge = i + 1; } // Check if we found a legal integer type creating a meaningful // merge. if (NumStoresToMerge < 2) { // We know that candidate stores are in order and of correct // shape. While there is no mergeable sequence from the // beginning one may start later in the sequence. The only // reason a merge of size N could have failed where another of // the same size would not have, is if the alignment has // improved. Drop as many candidates as we can here. unsigned NumSkip = 1; while ((NumSkip < NumConsecutiveStores) && (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign)) NumSkip++; StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); NumConsecutiveStores -= NumSkip; continue; } // Check that we can merge these candidates without causing a cycle. if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge, RootNode)) { StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge); NumConsecutiveStores -= NumStoresToMerge; continue; } MadeChange |= mergeStoresOfConstantsOrVecElts( StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false, /*UseVector*/ true, /*UseTrunc*/ false); StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge); NumConsecutiveStores -= NumStoresToMerge; } return MadeChange; } bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl &StoreNodes, unsigned NumConsecutiveStores, EVT MemVT, SDNode *RootNode, bool AllowVectors, bool IsNonTemporalStore, bool IsNonTemporalLoad) { LLVMContext &Context = *DAG.getContext(); const DataLayout &DL = DAG.getDataLayout(); int64_t ElementSizeBytes = MemVT.getStoreSize(); unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; bool MadeChange = false; // Look for load nodes which are used by the stored values. SmallVector LoadNodes; // Find acceptable loads. Loads need to have the same chain (token factor), // must not be zext, volatile, indexed, and they must be consecutive. BaseIndexOffset LdBasePtr; for (unsigned i = 0; i < NumConsecutiveStores; ++i) { StoreSDNode *St = cast(StoreNodes[i].MemNode); SDValue Val = peekThroughBitcasts(St->getValue()); LoadSDNode *Ld = cast(Val); BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG); // If this is not the first ptr that we check. int64_t LdOffset = 0; if (LdBasePtr.getBase().getNode()) { // The base ptr must be the same. if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset)) break; } else { // Check that all other base pointers are the same as this one. LdBasePtr = LdPtr; } // We found a potential memory operand to merge. LoadNodes.push_back(MemOpLink(Ld, LdOffset)); } while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) { Align RequiredAlignment; bool NeedRotate = false; if (LoadNodes.size() == 2) { // If we have load/store pair instructions and we only have two values, // don't bother merging. if (TLI.hasPairedLoad(MemVT, RequiredAlignment) && StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) { StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2); LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2); break; } // If the loads are reversed, see if we can rotate the halves into place. int64_t Offset0 = LoadNodes[0].OffsetFromBase; int64_t Offset1 = LoadNodes[1].OffsetFromBase; EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2); if (Offset0 - Offset1 == ElementSizeBytes && (hasOperation(ISD::ROTL, PairVT) || hasOperation(ISD::ROTR, PairVT))) { std::swap(LoadNodes[0], LoadNodes[1]); NeedRotate = true; } } LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; unsigned FirstStoreAS = FirstInChain->getAddressSpace(); Align FirstStoreAlign = FirstInChain->getAlign(); LoadSDNode *FirstLoad = cast(LoadNodes[0].MemNode); // Scan the memory operations on the chain and find the first // non-consecutive load memory address. These variables hold the index in // the store node array. unsigned LastConsecutiveLoad = 1; // This variable refers to the size and not index in the array. unsigned LastLegalVectorType = 1; unsigned LastLegalIntegerType = 1; bool isDereferenceable = true; bool DoIntegerTruncate = false; int64_t StartAddress = LoadNodes[0].OffsetFromBase; SDValue LoadChain = FirstLoad->getChain(); for (unsigned i = 1; i < LoadNodes.size(); ++i) { // All loads must share the same chain. if (LoadNodes[i].MemNode->getChain() != LoadChain) break; int64_t CurrAddress = LoadNodes[i].OffsetFromBase; if (CurrAddress - StartAddress != (ElementSizeBytes * i)) break; LastConsecutiveLoad = i; if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable()) isDereferenceable = false; // Find a legal type for the vector store. unsigned Elts = (i + 1) * NumMemElts; EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); // Break early when size is too large to be legal. if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits) break; bool IsFastSt = false; bool IsFastLd = false; // Don't try vector types if we need a rotate. We may still fail the // legality checks for the integer type, but we can't handle the rotate // case with vectors. // FIXME: We could use a shuffle in place of the rotate. if (!NeedRotate && TLI.isTypeLegal(StoreTy) && TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG.getMachineFunction()) && TLI.allowsMemoryAccess(Context, DL, StoreTy, *FirstInChain->getMemOperand(), &IsFastSt) && IsFastSt && TLI.allowsMemoryAccess(Context, DL, StoreTy, *FirstLoad->getMemOperand(), &IsFastLd) && IsFastLd) { LastLegalVectorType = i + 1; } // Find a legal type for the integer store. unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; StoreTy = EVT::getIntegerVT(Context, SizeInBits); if (TLI.isTypeLegal(StoreTy) && TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG.getMachineFunction()) && TLI.allowsMemoryAccess(Context, DL, StoreTy, *FirstInChain->getMemOperand(), &IsFastSt) && IsFastSt && TLI.allowsMemoryAccess(Context, DL, StoreTy, *FirstLoad->getMemOperand(), &IsFastLd) && IsFastLd) { LastLegalIntegerType = i + 1; DoIntegerTruncate = false; // Or check whether a truncstore and extload is legal. } else if (TLI.getTypeAction(Context, StoreTy) == TargetLowering::TypePromoteInteger) { EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy); if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG.getMachineFunction()) && TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) && TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) && TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) && TLI.allowsMemoryAccess(Context, DL, StoreTy, *FirstInChain->getMemOperand(), &IsFastSt) && IsFastSt && TLI.allowsMemoryAccess(Context, DL, StoreTy, *FirstLoad->getMemOperand(), &IsFastLd) && IsFastLd) { LastLegalIntegerType = i + 1; DoIntegerTruncate = true; } } } // Only use vector types if the vector type is larger than the integer // type. If they are the same, use integers. bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && AllowVectors; unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType); // We add +1 here because the LastXXX variables refer to location while // the NumElem refers to array/index size. unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1); NumElem = std::min(LastLegalType, NumElem); Align FirstLoadAlign = FirstLoad->getAlign(); if (NumElem < 2) { // We know that candidate stores are in order and of correct // shape. While there is no mergeable sequence from the // beginning one may start later in the sequence. The only // reason a merge of size N could have failed where another of // the same size would not have is if the alignment or either // the load or store has improved. Drop as many candidates as we // can here. unsigned NumSkip = 1; while ((NumSkip < LoadNodes.size()) && (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) && (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign)) NumSkip++; StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip); NumConsecutiveStores -= NumSkip; continue; } // Check that we can merge these candidates without causing a cycle. if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem, RootNode)) { StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem); NumConsecutiveStores -= NumElem; continue; } // Find if it is better to use vectors or integers to load and store // to memory. EVT JointMemOpVT; if (UseVectorTy) { // Find a legal type for the vector store. unsigned Elts = NumElem * NumMemElts; JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); } else { unsigned SizeInBits = NumElem * ElementSizeBytes * 8; JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits); } SDLoc LoadDL(LoadNodes[0].MemNode); SDLoc StoreDL(StoreNodes[0].MemNode); // The merged loads are required to have the same incoming chain, so // using the first's chain is acceptable. SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem); AddToWorklist(NewStoreChain.getNode()); MachineMemOperand::Flags LdMMOFlags = isDereferenceable ? MachineMemOperand::MODereferenceable : MachineMemOperand::MONone; if (IsNonTemporalLoad) LdMMOFlags |= MachineMemOperand::MONonTemporal; MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore ? MachineMemOperand::MONonTemporal : MachineMemOperand::MONone; SDValue NewLoad, NewStore; if (UseVectorTy || !DoIntegerTruncate) { NewLoad = DAG.getLoad( JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags); SDValue StoreOp = NewLoad; if (NeedRotate) { unsigned LoadWidth = ElementSizeBytes * 8 * 2; assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) && "Unexpected type for rotate-able load pair"); SDValue RotAmt = DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL); // Target can convert to the identical ROTR if it does not have ROTL. StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt); } NewStore = DAG.getStore( NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags); } else { // This must be the truncstore/extload case EVT ExtendedTy = TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT); NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, FirstLoad->getChain(), FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), JointMemOpVT, FirstLoadAlign, LdMMOFlags); NewStore = DAG.getTruncStore( NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), JointMemOpVT, FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags()); } // Transfer chain users from old loads to the new load. for (unsigned i = 0; i < NumElem; ++i) { LoadSDNode *Ld = cast(LoadNodes[i].MemNode); DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1)); } // Replace all stores with the new store. Recursively remove corresponding // values if they are no longer used. for (unsigned i = 0; i < NumElem; ++i) { SDValue Val = StoreNodes[i].MemNode->getOperand(1); CombineTo(StoreNodes[i].MemNode, NewStore); if (Val->use_empty()) recursivelyDeleteUnusedNodes(Val.getNode()); } MadeChange = true; StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem); NumConsecutiveStores -= NumElem; } return MadeChange; } bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) { if (OptLevel == CodeGenOpt::None || !EnableStoreMerging) return false; // TODO: Extend this function to merge stores of scalable vectors. // (i.e. two stores can be merged to one // store since we know is exactly twice as large as // ). Until then, bail out for scalable vectors. EVT MemVT = St->getMemoryVT(); if (MemVT.isScalableVector()) return false; if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits) return false; // This function cannot currently deal with non-byte-sized memory sizes. int64_t ElementSizeBytes = MemVT.getStoreSize(); if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits()) return false; // Do not bother looking at stored values that are not constants, loads, or // extracted vector elements. SDValue StoredVal = peekThroughBitcasts(St->getValue()); const StoreSource StoreSrc = getStoreSource(StoredVal); if (StoreSrc == StoreSource::Unknown) return false; SmallVector StoreNodes; SDNode *RootNode; // Find potential store merge candidates by searching through chain sub-DAG getStoreMergeCandidates(St, StoreNodes, RootNode); // Check if there is anything to merge. if (StoreNodes.size() < 2) return false; // Sort the memory operands according to their distance from the // base pointer. llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) { return LHS.OffsetFromBase < RHS.OffsetFromBase; }); bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute( Attribute::NoImplicitFloat); bool IsNonTemporalStore = St->isNonTemporal(); bool IsNonTemporalLoad = StoreSrc == StoreSource::Load && cast(StoredVal)->isNonTemporal(); // Store Merge attempts to merge the lowest stores. This generally // works out as if successful, as the remaining stores are checked // after the first collection of stores is merged. However, in the // case that a non-mergeable store is found first, e.g., {p[-2], // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent // mergeable cases. To prevent this, we prune such stores from the // front of StoreNodes here. bool MadeChange = false; while (StoreNodes.size() > 1) { unsigned NumConsecutiveStores = getConsecutiveStores(StoreNodes, ElementSizeBytes); // There are no more stores in the list to examine. if (NumConsecutiveStores == 0) return MadeChange; // We have at least 2 consecutive stores. Try to merge them. assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores"); switch (StoreSrc) { case StoreSource::Constant: MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores, MemVT, RootNode, AllowVectors); break; case StoreSource::Extract: MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores, MemVT, RootNode); break; case StoreSource::Load: MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores, MemVT, RootNode, AllowVectors, IsNonTemporalStore, IsNonTemporalLoad); break; default: llvm_unreachable("Unhandled store source type"); } } return MadeChange; } SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) { SDLoc SL(ST); SDValue ReplStore; // Replace the chain to avoid dependency. if (ST->isTruncatingStore()) { ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(), ST->getMemoryVT(), ST->getMemOperand()); } else { ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(), ST->getMemOperand()); } // Create token to keep both nodes around. SDValue Token = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, ST->getChain(), ReplStore); // Make sure the new and old chains are cleaned up. AddToWorklist(Token.getNode()); // Don't add users to work list. return CombineTo(ST, Token, false); } SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { SDValue Value = ST->getValue(); if (Value.getOpcode() == ISD::TargetConstantFP) return SDValue(); if (!ISD::isNormalStore(ST)) return SDValue(); SDLoc DL(ST); SDValue Chain = ST->getChain(); SDValue Ptr = ST->getBasePtr(); const ConstantFPSDNode *CFP = cast(Value); // NOTE: If the original store is volatile, this transform must not increase // the number of stores. For example, on x86-32 an f64 can be stored in one // processor operation but an i64 (which is not legal) requires two. So the // transform should not be done in this case. SDValue Tmp; switch (CFP->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unknown FP type"); case MVT::f16: // We don't do this for these yet. case MVT::bf16: case MVT::f80: case MVT::f128: case MVT::ppcf128: return SDValue(); case MVT::f32: if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) || TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). bitcastToAPInt().getZExtValue(), SDLoc(CFP), MVT::i32); return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand()); } return SDValue(); case MVT::f64: if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && ST->isSimple()) || TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). getZExtValue(), SDLoc(CFP), MVT::i64); return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand()); } if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { // Many FP stores are not made apparent until after legalize, e.g. for // argument passing. Since this is so common, custom legalize the // 64-bit integer store into two 32-bit stores. uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32); SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32); if (DAG.getDataLayout().isBigEndian()) std::swap(Lo, Hi); MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(), ST->getOriginalAlign(), MMOFlags, AAInfo); Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), DL); SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(4), ST->getOriginalAlign(), MMOFlags, AAInfo); return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, St0, St1); } return SDValue(); } } SDValue DAGCombiner::visitSTORE(SDNode *N) { StoreSDNode *ST = cast(N); SDValue Chain = ST->getChain(); SDValue Value = ST->getValue(); SDValue Ptr = ST->getBasePtr(); // If this is a store of a bit convert, store the input value if the // resultant store does not need a higher alignment than the original. if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() && ST->isUnindexed()) { EVT SVT = Value.getOperand(0).getValueType(); // If the store is volatile, we only want to change the store type if the // resulting store is legal. Otherwise we might increase the number of // memory accesses. We don't care if the original type was legal or not // as we assume software couldn't rely on the number of accesses of an // illegal type. // TODO: May be able to relax for unordered atomics (see D66309) if (((!LegalOperations && ST->isSimple()) || TLI.isOperationLegal(ISD::STORE, SVT)) && TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT, DAG, *ST->getMemOperand())) { return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, ST->getMemOperand()); } } // Turn 'store undef, Ptr' -> nothing. if (Value.isUndef() && ST->isUnindexed()) return Chain; // Try to infer better alignment information than the store already has. if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) { if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) { if (*Alignment > ST->getAlign() && isAligned(*Alignment, ST->getSrcValueOffset())) { SDValue NewStore = DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(), ST->getMemoryVT(), *Alignment, ST->getMemOperand()->getFlags(), ST->getAAInfo()); // NewStore will always be N as we are only refining the alignment assert(NewStore.getNode() == N); (void)NewStore; } } } // Try transforming a pair floating point load / store ops to integer // load / store ops. if (SDValue NewST = TransformFPLoadStorePair(N)) return NewST; // Try transforming several stores into STORE (BSWAP). if (SDValue Store = mergeTruncStores(ST)) return Store; if (ST->isUnindexed()) { // Walk up chain skipping non-aliasing memory nodes, on this store and any // adjacent stores. if (findBetterNeighborChains(ST)) { // replaceStoreChain uses CombineTo, which handled all of the worklist // manipulation. Return the original node to not do anything else. return SDValue(ST, 0); } Chain = ST->getChain(); } // FIXME: is there such a thing as a truncating indexed store? if (ST->isTruncatingStore() && ST->isUnindexed() && Value.getValueType().isInteger() && (!isa(Value) || !cast(Value)->isOpaque())) { // Convert a truncating store of a extension into a standard store. if ((Value.getOpcode() == ISD::ZERO_EXTEND || Value.getOpcode() == ISD::SIGN_EXTEND || Value.getOpcode() == ISD::ANY_EXTEND) && Value.getOperand(0).getValueType() == ST->getMemoryVT() && TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT())) return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, ST->getMemOperand()); APInt TruncDemandedBits = APInt::getLowBitsSet(Value.getScalarValueSizeInBits(), ST->getMemoryVT().getScalarSizeInBits()); // See if we can simplify the input to this truncstore with knowledge that // only the low bits are being used. For example: // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" AddToWorklist(Value.getNode()); if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits)) return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(), ST->getMemOperand()); // Otherwise, see if we can simplify the operation with // SimplifyDemandedBits, which only works if the value has a single use. if (SimplifyDemandedBits(Value, TruncDemandedBits)) { // Re-visit the store if anything changed and the store hasn't been merged // with another node (N is deleted) SimplifyDemandedBits will add Value's // node back to the worklist if necessary, but we also need to re-visit // the Store node itself. if (N->getOpcode() != ISD::DELETED_NODE) AddToWorklist(N); return SDValue(N, 0); } } // If this is a load followed by a store to the same location, then the store // is dead/noop. // TODO: Can relax for unordered atomics (see D66309) if (LoadSDNode *Ld = dyn_cast(Value)) { if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() && ST->isUnindexed() && ST->isSimple() && Ld->getAddressSpace() == ST->getAddressSpace() && // There can't be any side effects between the load and store, such as // a call or store. Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) { // The store is dead, remove it. return Chain; } } // TODO: Can relax for unordered atomics (see D66309) if (StoreSDNode *ST1 = dyn_cast(Chain)) { if (ST->isUnindexed() && ST->isSimple() && ST1->isUnindexed() && ST1->isSimple()) { if (OptLevel != CodeGenOpt::None && ST1->getBasePtr() == Ptr && ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() && ST->getAddressSpace() == ST1->getAddressSpace()) { // If this is a store followed by a store with the same value to the // same location, then the store is dead/noop. return Chain; } if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() && !ST1->getBasePtr().isUndef() && // BaseIndexOffset and the code below requires knowing the size // of a vector, so bail out if MemoryVT is scalable. !ST->getMemoryVT().isScalableVector() && !ST1->getMemoryVT().isScalableVector() && ST->getAddressSpace() == ST1->getAddressSpace()) { const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG); const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG); unsigned STBitSize = ST->getMemoryVT().getFixedSizeInBits(); unsigned ChainBitSize = ST1->getMemoryVT().getFixedSizeInBits(); // If this is a store who's preceding store to a subset of the current // location and no one other node is chained to that store we can // effectively drop the store. Do not remove stores to undef as they may // be used as data sinks. if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) { CombineTo(ST1, ST1->getChain()); return SDValue(); } } } } // If this is an FP_ROUND or TRUNC followed by a store, fold this into a // truncating store. We can do this even if this is already a truncstore. if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() && ST->isUnindexed() && TLI.canCombineTruncStore(Value.getOperand(0).getValueType(), ST->getMemoryVT(), LegalOperations)) { return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, ST->getMemoryVT(), ST->getMemOperand()); } // Always perform this optimization before types are legal. If the target // prefers, also try this after legalization to catch stores that were created // by intrinsics or other nodes. if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) { while (true) { // There can be multiple store sequences on the same chain. // Keep trying to merge store sequences until we are unable to do so // or until we merge the last store on the chain. bool Changed = mergeConsecutiveStores(ST); if (!Changed) break; // Return N as merge only uses CombineTo and no worklist clean // up is necessary. if (N->getOpcode() == ISD::DELETED_NODE || !isa(N)) return SDValue(N, 0); } } // Try transforming N to an indexed store. if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) return SDValue(N, 0); // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' // // Make sure to do this only after attempting to merge stores in order to // avoid changing the types of some subset of stores due to visit order, // preventing their merging. if (isa(ST->getValue())) { if (SDValue NewSt = replaceStoreOfFPConstant(ST)) return NewSt; } if (SDValue NewSt = splitMergedValStore(ST)) return NewSt; return ReduceLoadOpStoreWidth(N); } SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) { const auto *LifetimeEnd = cast(N); if (!LifetimeEnd->hasOffset()) return SDValue(); const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(), LifetimeEnd->getOffset(), false); // We walk up the chains to find stores. SmallVector Chains = {N->getOperand(0)}; while (!Chains.empty()) { SDValue Chain = Chains.pop_back_val(); if (!Chain.hasOneUse()) continue; switch (Chain.getOpcode()) { case ISD::TokenFactor: for (unsigned Nops = Chain.getNumOperands(); Nops;) Chains.push_back(Chain.getOperand(--Nops)); break; case ISD::LIFETIME_START: case ISD::LIFETIME_END: // We can forward past any lifetime start/end that can be proven not to // alias the node. if (!mayAlias(Chain.getNode(), N)) Chains.push_back(Chain.getOperand(0)); break; case ISD::STORE: { StoreSDNode *ST = dyn_cast(Chain); // TODO: Can relax for unordered atomics (see D66309) if (!ST->isSimple() || ST->isIndexed()) continue; const TypeSize StoreSize = ST->getMemoryVT().getStoreSize(); // The bounds of a scalable store are not known until runtime, so this // store cannot be elided. if (StoreSize.isScalable()) continue; const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG); // If we store purely within object bounds just before its lifetime ends, // we can remove the store. if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase, StoreSize.getFixedSize() * 8)) { LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump(); dbgs() << "\nwithin LIFETIME_END of : "; LifetimeEndBase.dump(); dbgs() << "\n"); CombineTo(ST, ST->getChain()); return SDValue(N, 0); } } } } return SDValue(); } /// For the instruction sequence of store below, F and I values /// are bundled together as an i64 value before being stored into memory. /// Sometimes it is more efficent to generate separate stores for F and I, /// which can remove the bitwise instructions or sink them to colder places. /// /// (store (or (zext (bitcast F to i32) to i64), /// (shl (zext I to i64), 32)), addr) --> /// (store F, addr) and (store I, addr+4) /// /// Similarly, splitting for other merged store can also be beneficial, like: /// For pair of {i32, i32}, i64 store --> two i32 stores. /// For pair of {i32, i16}, i64 store --> two i32 stores. /// For pair of {i16, i16}, i32 store --> two i16 stores. /// For pair of {i16, i8}, i32 store --> two i16 stores. /// For pair of {i8, i8}, i16 store --> two i8 stores. /// /// We allow each target to determine specifically which kind of splitting is /// supported. /// /// The store patterns are commonly seen from the simple code snippet below /// if only std::make_pair(...) is sroa transformed before inlined into hoo. /// void goo(const std::pair &); /// hoo() { /// ... /// goo(std::make_pair(tmp, ftmp)); /// ... /// } /// SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { if (OptLevel == CodeGenOpt::None) return SDValue(); // Can't change the number of memory accesses for a volatile store or break // atomicity for an atomic one. if (!ST->isSimple()) return SDValue(); SDValue Val = ST->getValue(); SDLoc DL(ST); // Match OR operand. if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR) return SDValue(); // Match SHL operand and get Lower and Higher parts of Val. SDValue Op1 = Val.getOperand(0); SDValue Op2 = Val.getOperand(1); SDValue Lo, Hi; if (Op1.getOpcode() != ISD::SHL) { std::swap(Op1, Op2); if (Op1.getOpcode() != ISD::SHL) return SDValue(); } Lo = Op2; Hi = Op1.getOperand(0); if (!Op1.hasOneUse()) return SDValue(); // Match shift amount to HalfValBitSize. unsigned HalfValBitSize = Val.getValueSizeInBits() / 2; ConstantSDNode *ShAmt = dyn_cast(Op1.getOperand(1)); if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize) return SDValue(); // Lo and Hi are zero-extended from int with size less equal than 32 // to i64. if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() || !Lo.getOperand(0).getValueType().isScalarInteger() || Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize || Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() || !Hi.getOperand(0).getValueType().isScalarInteger() || Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize) return SDValue(); // Use the EVT of low and high parts before bitcast as the input // of target query. EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST) ? Lo.getOperand(0).getValueType() : Lo.getValueType(); EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST) ? Hi.getOperand(0).getValueType() : Hi.getValueType(); if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy)) return SDValue(); // Start to split store. MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); // Change the sizes of Lo and Hi's value types to HalfValBitSize. EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize); Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0)); Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0)); SDValue Chain = ST->getChain(); SDValue Ptr = ST->getBasePtr(); // Lower value store. SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(), ST->getOriginalAlign(), MMOFlags, AAInfo); Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(HalfValBitSize / 8), DL); // Higher value store. SDValue St1 = DAG.getStore( St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8), ST->getOriginalAlign(), MMOFlags, AAInfo); return St1; } /// Convert a disguised subvector insertion into a shuffle: SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) { assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT && "Expected extract_vector_elt"); SDValue InsertVal = N->getOperand(1); SDValue Vec = N->getOperand(0); // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N), // InsIndex) // --> (vector_shuffle X, Y) and variations where shuffle operands may be // CONCAT_VECTORS. if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() && InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT && isa(InsertVal.getOperand(1))) { ShuffleVectorSDNode *SVN = cast(Vec.getNode()); ArrayRef Mask = SVN->getMask(); SDValue X = Vec.getOperand(0); SDValue Y = Vec.getOperand(1); // Vec's operand 0 is using indices from 0 to N-1 and // operand 1 from N to 2N - 1, where N is the number of // elements in the vectors. SDValue InsertVal0 = InsertVal.getOperand(0); int ElementOffset = -1; // We explore the inputs of the shuffle in order to see if we find the // source of the extract_vector_elt. If so, we can use it to modify the // shuffle rather than perform an insert_vector_elt. SmallVector, 8> ArgWorkList; ArgWorkList.emplace_back(Mask.size(), Y); ArgWorkList.emplace_back(0, X); while (!ArgWorkList.empty()) { int ArgOffset; SDValue ArgVal; std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val(); if (ArgVal == InsertVal0) { ElementOffset = ArgOffset; break; } // Peek through concat_vector. if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) { int CurrentArgOffset = ArgOffset + ArgVal.getValueType().getVectorNumElements(); int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements(); for (SDValue Op : reverse(ArgVal->ops())) { CurrentArgOffset -= Step; ArgWorkList.emplace_back(CurrentArgOffset, Op); } // Make sure we went through all the elements and did not screw up index // computation. assert(CurrentArgOffset == ArgOffset); } } // If we failed to find a match, see if we can replace an UNDEF shuffle // operand. if (ElementOffset == -1 && Y.isUndef() && InsertVal0.getValueType() == Y.getValueType()) { ElementOffset = Mask.size(); Y = InsertVal0; } if (ElementOffset != -1) { SmallVector NewMask(Mask.begin(), Mask.end()); auto *ExtrIndex = cast(InsertVal.getOperand(1)); NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue(); assert(NewMask[InsIndex] < (int)(2 * Vec.getValueType().getVectorNumElements()) && NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound"); SDValue LegalShuffle = TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X, Y, NewMask, DAG); if (LegalShuffle) return LegalShuffle; } } // insert_vector_elt V, (bitcast X from vector type), IdxC --> // bitcast(shuffle (bitcast V), (extended X), Mask) // Note: We do not use an insert_subvector node because that requires a // legal subvector type. if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() || !InsertVal.getOperand(0).getValueType().isVector()) return SDValue(); SDValue SubVec = InsertVal.getOperand(0); SDValue DestVec = N->getOperand(0); EVT SubVecVT = SubVec.getValueType(); EVT VT = DestVec.getValueType(); unsigned NumSrcElts = SubVecVT.getVectorNumElements(); // If the source only has a single vector element, the cost of creating adding // it to a vector is likely to exceed the cost of a insert_vector_elt. if (NumSrcElts == 1) return SDValue(); unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits(); unsigned NumMaskVals = ExtendRatio * NumSrcElts; // Step 1: Create a shuffle mask that implements this insert operation. The // vector that we are inserting into will be operand 0 of the shuffle, so // those elements are just 'i'. The inserted subvector is in the first // positions of operand 1 of the shuffle. Example: // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7} SmallVector Mask(NumMaskVals); for (unsigned i = 0; i != NumMaskVals; ++i) { if (i / NumSrcElts == InsIndex) Mask[i] = (i % NumSrcElts) + NumMaskVals; else Mask[i] = i; } // Bail out if the target can not handle the shuffle we want to create. EVT SubVecEltVT = SubVecVT.getVectorElementType(); EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals); if (!TLI.isShuffleMaskLegal(Mask, ShufVT)) return SDValue(); // Step 2: Create a wide vector from the inserted source vector by appending // undefined elements. This is the same size as our destination vector. SDLoc DL(N); SmallVector ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT)); ConcatOps[0] = SubVec; SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps); // Step 3: Shuffle in the padded subvector. SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec); SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask); AddToWorklist(PaddedSubV.getNode()); AddToWorklist(DestVecBC.getNode()); AddToWorklist(Shuf.getNode()); return DAG.getBitcast(VT, Shuf); } SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { SDValue InVec = N->getOperand(0); SDValue InVal = N->getOperand(1); SDValue EltNo = N->getOperand(2); SDLoc DL(N); EVT VT = InVec.getValueType(); auto *IndexC = dyn_cast(EltNo); // Insert into out-of-bounds element is undefined. if (IndexC && VT.isFixedLengthVector() && IndexC->getZExtValue() >= VT.getVectorNumElements()) return DAG.getUNDEF(VT); // Remove redundant insertions: // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT && InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1)) return InVec; if (!IndexC) { // If this is variable insert to undef vector, it might be better to splat: // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... > if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) { if (VT.isScalableVector()) return DAG.getSplatVector(VT, DL, InVal); SmallVector Ops(VT.getVectorNumElements(), InVal); return DAG.getBuildVector(VT, DL, Ops); } return SDValue(); } if (VT.isScalableVector()) return SDValue(); unsigned NumElts = VT.getVectorNumElements(); // We must know which element is being inserted for folds below here. unsigned Elt = IndexC->getZExtValue(); if (SDValue Shuf = combineInsertEltToShuffle(N, Elt)) return Shuf; // Handle <1 x ???> vector insertion special cases. if (NumElts == 1) { // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT && InVal.getOperand(0).getValueType() == VT && isNullConstant(InVal.getOperand(1))) return InVal.getOperand(0); } // Canonicalize insert_vector_elt dag nodes. // Example: // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1) // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0) // // Do this only if the child insert_vector node has one use; also // do this only if indices are both constants and Idx1 < Idx0. if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse() && isa(InVec.getOperand(2))) { unsigned OtherElt = InVec.getConstantOperandVal(2); if (Elt < OtherElt) { // Swap nodes. SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, InVec.getOperand(0), InVal, EltNo); AddToWorklist(NewOp.getNode()); return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()), VT, NewOp, InVec.getOperand(1), InVec.getOperand(2)); } } // Attempt to convert an insert_vector_elt chain into a legal build_vector. if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) { // vXi1 vector - we don't need to recurse. if (NumElts == 1) return DAG.getBuildVector(VT, DL, {InVal}); // If we haven't already collected the element, insert into the op list. EVT MaxEltVT = InVal.getValueType(); auto AddBuildVectorOp = [&](SmallVectorImpl &Ops, SDValue Elt, unsigned Idx) { if (!Ops[Idx]) { Ops[Idx] = Elt; if (VT.isInteger()) { EVT EltVT = Elt.getValueType(); MaxEltVT = MaxEltVT.bitsGE(EltVT) ? MaxEltVT : EltVT; } } }; // Ensure all the operands are the same value type, fill any missing // operands with UNDEF and create the BUILD_VECTOR. auto CanonicalizeBuildVector = [&](SmallVectorImpl &Ops) { assert(Ops.size() == NumElts && "Unexpected vector size"); for (SDValue &Op : Ops) { if (Op) Op = VT.isInteger() ? DAG.getAnyExtOrTrunc(Op, DL, MaxEltVT) : Op; else Op = DAG.getUNDEF(MaxEltVT); } return DAG.getBuildVector(VT, DL, Ops); }; SmallVector Ops(NumElts, SDValue()); Ops[Elt] = InVal; // Recurse up a INSERT_VECTOR_ELT chain to build a BUILD_VECTOR. for (SDValue CurVec = InVec; CurVec;) { // UNDEF - build new BUILD_VECTOR from already inserted operands. if (CurVec.isUndef()) return CanonicalizeBuildVector(Ops); // BUILD_VECTOR - insert unused operands and build new BUILD_VECTOR. if (CurVec.getOpcode() == ISD::BUILD_VECTOR && CurVec.hasOneUse()) { for (unsigned I = 0; I != NumElts; ++I) AddBuildVectorOp(Ops, CurVec.getOperand(I), I); return CanonicalizeBuildVector(Ops); } // SCALAR_TO_VECTOR - insert unused scalar and build new BUILD_VECTOR. if (CurVec.getOpcode() == ISD::SCALAR_TO_VECTOR && CurVec.hasOneUse()) { AddBuildVectorOp(Ops, CurVec.getOperand(0), 0); return CanonicalizeBuildVector(Ops); } // INSERT_VECTOR_ELT - insert operand and continue up the chain. if (CurVec.getOpcode() == ISD::INSERT_VECTOR_ELT && CurVec.hasOneUse()) if (auto *CurIdx = dyn_cast(CurVec.getOperand(2))) if (CurIdx->getAPIntValue().ult(NumElts)) { unsigned Idx = CurIdx->getZExtValue(); AddBuildVectorOp(Ops, CurVec.getOperand(1), Idx); // Found entire BUILD_VECTOR. if (all_of(Ops, [](SDValue Op) { return !!Op; })) return CanonicalizeBuildVector(Ops); CurVec = CurVec->getOperand(0); continue; } // Failed to find a match in the chain - bail. break; } } return SDValue(); } SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) { assert(OriginalLoad->isSimple()); EVT ResultVT = EVE->getValueType(0); EVT VecEltVT = InVecVT.getVectorElementType(); // If the vector element type is not a multiple of a byte then we are unable // to correctly compute an address to load only the extracted element as a // scalar. if (!VecEltVT.isByteSized()) return SDValue(); ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ? ISD::NON_EXTLOAD : ISD::EXTLOAD; if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) || !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT)) return SDValue(); Align Alignment = OriginalLoad->getAlign(); MachinePointerInfo MPI; SDLoc DL(EVE); if (auto *ConstEltNo = dyn_cast(EltNo)) { int Elt = ConstEltNo->getZExtValue(); unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8; MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff); Alignment = commonAlignment(Alignment, PtrOff); } else { // Discard the pointer info except the address space because the memory // operand can't represent this new access since the offset is variable. MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace()); Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8); } bool IsFast = false; if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT, OriginalLoad->getAddressSpace(), Alignment, OriginalLoad->getMemOperand()->getFlags(), &IsFast) || !IsFast) return SDValue(); SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(), InVecVT, EltNo); // We are replacing a vector load with a scalar load. The new load must have // identical memory op ordering to the original. SDValue Load; if (ResultVT.bitsGT(VecEltVT)) { // If the result type of vextract is wider than the load, then issue an // extending load instead. ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD; Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(), NewPtr, MPI, VecEltVT, Alignment, OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo()); DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load); } else { // The result type is narrower or the same width as the vector element Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI, Alignment, OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo()); DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load); if (ResultVT.bitsLT(VecEltVT)) Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load); else Load = DAG.getBitcast(ResultVT, Load); } ++OpsNarrowed; return Load; } /// Transform a vector binary operation into a scalar binary operation by moving /// the math/logic after an extract element of a vector. static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG, bool LegalOperations) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Vec = ExtElt->getOperand(0); SDValue Index = ExtElt->getOperand(1); auto *IndexC = dyn_cast(Index); if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() || Vec->getNumValues() != 1) return SDValue(); // Targets may want to avoid this to prevent an expensive register transfer. if (!TLI.shouldScalarizeBinop(Vec)) return SDValue(); // Extracting an element of a vector constant is constant-folded, so this // transform is just replacing a vector op with a scalar op while moving the // extract. SDValue Op0 = Vec.getOperand(0); SDValue Op1 = Vec.getOperand(1); APInt SplatVal; if (isAnyConstantBuildVector(Op0, true) || ISD::isConstantSplatVector(Op0.getNode(), SplatVal) || isAnyConstantBuildVector(Op1, true) || ISD::isConstantSplatVector(Op1.getNode(), SplatVal)) { // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C' // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC) SDLoc DL(ExtElt); EVT VT = ExtElt->getValueType(0); SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index); SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index); return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1); } return SDValue(); } SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { SDValue VecOp = N->getOperand(0); SDValue Index = N->getOperand(1); EVT ScalarVT = N->getValueType(0); EVT VecVT = VecOp.getValueType(); if (VecOp.isUndef()) return DAG.getUNDEF(ScalarVT); // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val // // This only really matters if the index is non-constant since other combines // on the constant elements already work. SDLoc DL(N); if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT && Index == VecOp.getOperand(2)) { SDValue Elt = VecOp.getOperand(1); return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt; } // (vextract (scalar_to_vector val, 0) -> val if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) { // Only 0'th element of SCALAR_TO_VECTOR is defined. if (DAG.isKnownNeverZero(Index)) return DAG.getUNDEF(ScalarVT); // Check if the result type doesn't match the inserted element type. A // SCALAR_TO_VECTOR may truncate the inserted element and the // EXTRACT_VECTOR_ELT may widen the extracted vector. SDValue InOp = VecOp.getOperand(0); if (InOp.getValueType() != ScalarVT) { assert(InOp.getValueType().isInteger() && ScalarVT.isInteger() && InOp.getValueType().bitsGT(ScalarVT)); return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp); } return InOp; } // extract_vector_elt of out-of-bounds element -> UNDEF auto *IndexC = dyn_cast(Index); if (IndexC && VecVT.isFixedLengthVector() && IndexC->getAPIntValue().uge(VecVT.getVectorNumElements())) return DAG.getUNDEF(ScalarVT); // extract_vector_elt (build_vector x, y), 1 -> y if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) || VecOp.getOpcode() == ISD::SPLAT_VECTOR) && TLI.isTypeLegal(VecVT) && (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) { assert((VecOp.getOpcode() != ISD::BUILD_VECTOR || VecVT.isFixedLengthVector()) && "BUILD_VECTOR used for scalable vectors"); unsigned IndexVal = VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0; SDValue Elt = VecOp.getOperand(IndexVal); EVT InEltVT = Elt.getValueType(); // Sometimes build_vector's scalar input types do not match result type. if (ScalarVT == InEltVT) return Elt; // TODO: It may be useful to truncate if free if the build_vector implicitly // converts. } if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations)) return BO; if (VecVT.isScalableVector()) return SDValue(); // All the code from this point onwards assumes fixed width vectors, but it's // possible that some of the combinations could be made to work for scalable // vectors too. unsigned NumElts = VecVT.getVectorNumElements(); unsigned VecEltBitWidth = VecVT.getScalarSizeInBits(); // TODO: These transforms should not require the 'hasOneUse' restriction, but // there are regressions on multiple targets without it. We can end up with a // mess of scalar and vector code if we reduce only part of the DAG to scalar. if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() && VecOp.hasOneUse()) { // The vector index of the LSBs of the source depend on the endian-ness. bool IsLE = DAG.getDataLayout().isLittleEndian(); unsigned ExtractIndex = IndexC->getZExtValue(); // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x) unsigned BCTruncElt = IsLE ? 0 : NumElts - 1; SDValue BCSrc = VecOp.getOperand(0); if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger()) return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc); if (LegalTypes && BCSrc.getValueType().isInteger() && BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) { // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt --> // trunc i64 X to i32 SDValue X = BCSrc.getOperand(0); assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() && "Extract element and scalar to vector can't change element type " "from FP to integer."); unsigned XBitWidth = X.getValueSizeInBits(); BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1; // An extract element return value type can be wider than its vector // operand element type. In that case, the high bits are undefined, so // it's possible that we may need to extend rather than truncate. if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) { assert(XBitWidth % VecEltBitWidth == 0 && "Scalar bitwidth must be a multiple of vector element bitwidth"); return DAG.getAnyExtOrTrunc(X, DL, ScalarVT); } } } // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. // We only perform this optimization before the op legalization phase because // we may introduce new vector instructions which are not backed by TD // patterns. For example on AVX, extracting elements from a wide vector // without using extract_subvector. However, if we can find an underlying // scalar value, then we can always use that. if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) { auto *Shuf = cast(VecOp); // Find the new index to extract from. int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue()); // Extracting an undef index is undef. if (OrigElt == -1) return DAG.getUNDEF(ScalarVT); // Select the right vector half to extract from. SDValue SVInVec; if (OrigElt < (int)NumElts) { SVInVec = VecOp.getOperand(0); } else { SVInVec = VecOp.getOperand(1); OrigElt -= NumElts; } if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) { SDValue InOp = SVInVec.getOperand(OrigElt); if (InOp.getValueType() != ScalarVT) { assert(InOp.getValueType().isInteger() && ScalarVT.isInteger()); InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT); } return InOp; } // FIXME: We should handle recursing on other vector shuffles and // scalar_to_vector here as well. if (!LegalOperations || // FIXME: Should really be just isOperationLegalOrCustom. TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) || TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) { return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec, DAG.getVectorIdxConstant(OrigElt, DL)); } } // If only EXTRACT_VECTOR_ELT nodes use the source vector we can // simplify it based on the (valid) extraction indices. if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) { return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT && Use->getOperand(0) == VecOp && isa(Use->getOperand(1)); })) { APInt DemandedElts = APInt::getZero(NumElts); for (SDNode *Use : VecOp->uses()) { auto *CstElt = cast(Use->getOperand(1)); if (CstElt->getAPIntValue().ult(NumElts)) DemandedElts.setBit(CstElt->getZExtValue()); } if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) { // We simplified the vector operand of this extract element. If this // extract is not dead, visit it again so it is folded properly. if (N->getOpcode() != ISD::DELETED_NODE) AddToWorklist(N); return SDValue(N, 0); } APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth); if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) { // We simplified the vector operand of this extract element. If this // extract is not dead, visit it again so it is folded properly. if (N->getOpcode() != ISD::DELETED_NODE) AddToWorklist(N); return SDValue(N, 0); } } // Everything under here is trying to match an extract of a loaded value. // If the result of load has to be truncated, then it's not necessarily // profitable. bool BCNumEltsChanged = false; EVT ExtVT = VecVT.getVectorElementType(); EVT LVT = ExtVT; if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT)) return SDValue(); if (VecOp.getOpcode() == ISD::BITCAST) { // Don't duplicate a load with other uses. if (!VecOp.hasOneUse()) return SDValue(); EVT BCVT = VecOp.getOperand(0).getValueType(); if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) return SDValue(); if (NumElts != BCVT.getVectorNumElements()) BCNumEltsChanged = true; VecOp = VecOp.getOperand(0); ExtVT = BCVT.getVectorElementType(); } // extract (vector load $addr), i --> load $addr + i * size if (!LegalOperations && !IndexC && VecOp.hasOneUse() && ISD::isNormalLoad(VecOp.getNode()) && !Index->hasPredecessor(VecOp.getNode())) { auto *VecLoad = dyn_cast(VecOp); if (VecLoad && VecLoad->isSimple()) return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad); } // Perform only after legalization to ensure build_vector / vector_shuffle // optimizations have already been done. if (!LegalOperations || !IndexC) return SDValue(); // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size) // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size) // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr) int Elt = IndexC->getZExtValue(); LoadSDNode *LN0 = nullptr; if (ISD::isNormalLoad(VecOp.getNode())) { LN0 = cast(VecOp); } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR && VecOp.getOperand(0).getValueType() == ExtVT && ISD::isNormalLoad(VecOp.getOperand(0).getNode())) { // Don't duplicate a load with other uses. if (!VecOp.hasOneUse()) return SDValue(); LN0 = cast(VecOp.getOperand(0)); } if (auto *Shuf = dyn_cast(VecOp)) { // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1) // => // (load $addr+1*size) // Don't duplicate a load with other uses. if (!VecOp.hasOneUse()) return SDValue(); // If the bit convert changed the number of elements, it is unsafe // to examine the mask. if (BCNumEltsChanged) return SDValue(); // Select the input vector, guarding against out of range extract vector. int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt); VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1); if (VecOp.getOpcode() == ISD::BITCAST) { // Don't duplicate a load with other uses. if (!VecOp.hasOneUse()) return SDValue(); VecOp = VecOp.getOperand(0); } if (ISD::isNormalLoad(VecOp.getNode())) { LN0 = cast(VecOp); Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts; Index = DAG.getConstant(Elt, DL, Index.getValueType()); } } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged && VecVT.getVectorElementType() == ScalarVT && (!LegalTypes || TLI.isTypeLegal( VecOp.getOperand(0).getValueType().getVectorElementType()))) { // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0 // -> extract_vector_elt a, 0 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1 // -> extract_vector_elt a, 1 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2 // -> extract_vector_elt b, 0 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3 // -> extract_vector_elt b, 1 SDLoc SL(N); EVT ConcatVT = VecOp.getOperand(0).getValueType(); unsigned ConcatNumElts = ConcatVT.getVectorNumElements(); SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL, Index.getValueType()); SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts); SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ConcatVT.getVectorElementType(), ConcatOp, NewIdx); return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt); } // Make sure we found a non-volatile load and the extractelement is // the only use. if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple()) return SDValue(); // If Idx was -1 above, Elt is going to be -1, so just return undef. if (Elt == -1) return DAG.getUNDEF(LVT); return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0); } // Simplify (build_vec (ext )) to (bitcast (build_vec )) SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { // We perform this optimization post type-legalization because // the type-legalizer often scalarizes integer-promoted vectors. // Performing this optimization before may create bit-casts which // will be type-legalized to complex code sequences. // We perform this optimization only before the operation legalizer because we // may introduce illegal operations. if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes) return SDValue(); unsigned NumInScalars = N->getNumOperands(); SDLoc DL(N); EVT VT = N->getValueType(0); // Check to see if this is a BUILD_VECTOR of a bunch of values // which come from any_extend or zero_extend nodes. If so, we can create // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR // optimizations. We do not handle sign-extend because we can't fill the sign // using shuffles. EVT SourceType = MVT::Other; bool AllAnyExt = true; for (unsigned i = 0; i != NumInScalars; ++i) { SDValue In = N->getOperand(i); // Ignore undef inputs. if (In.isUndef()) continue; bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND; bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND; // Abort if the element is not an extension. if (!ZeroExt && !AnyExt) { SourceType = MVT::Other; break; } // The input is a ZeroExt or AnyExt. Check the original type. EVT InTy = In.getOperand(0).getValueType(); // Check that all of the widened source types are the same. if (SourceType == MVT::Other) // First time. SourceType = InTy; else if (InTy != SourceType) { // Multiple income types. Abort. SourceType = MVT::Other; break; } // Check if all of the extends are ANY_EXTENDs. AllAnyExt &= AnyExt; } // In order to have valid types, all of the inputs must be extended from the // same source type and all of the inputs must be any or zero extend. // Scalar sizes must be a power of two. EVT OutScalarTy = VT.getScalarType(); bool ValidTypes = SourceType != MVT::Other && isPowerOf2_32(OutScalarTy.getSizeInBits()) && isPowerOf2_32(SourceType.getSizeInBits()); // Create a new simpler BUILD_VECTOR sequence which other optimizations can // turn into a single shuffle instruction. if (!ValidTypes) return SDValue(); // If we already have a splat buildvector, then don't fold it if it means // introducing zeros. if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true)) return SDValue(); bool isLE = DAG.getDataLayout().isLittleEndian(); unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); assert(ElemRatio > 1 && "Invalid element size ratio"); SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): DAG.getConstant(0, DL, SourceType); unsigned NewBVElems = ElemRatio * VT.getVectorNumElements(); SmallVector Ops(NewBVElems, Filler); // Populate the new build_vector for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { SDValue Cast = N->getOperand(i); assert((Cast.getOpcode() == ISD::ANY_EXTEND || Cast.getOpcode() == ISD::ZERO_EXTEND || Cast.isUndef()) && "Invalid cast opcode"); SDValue In; if (Cast.isUndef()) In = DAG.getUNDEF(SourceType); else In = Cast->getOperand(0); unsigned Index = isLE ? (i * ElemRatio) : (i * ElemRatio + (ElemRatio - 1)); assert(Index < Ops.size() && "Invalid index"); Ops[Index] = In; } // The type of the new BUILD_VECTOR node. EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems); assert(VecVT.getSizeInBits() == VT.getSizeInBits() && "Invalid vector size"); // Check if the new vector type is legal. if (!isTypeLegal(VecVT) || (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) && TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))) return SDValue(); // Make the new BUILD_VECTOR. SDValue BV = DAG.getBuildVector(VecVT, DL, Ops); // The new BUILD_VECTOR node has the potential to be further optimized. AddToWorklist(BV.getNode()); // Bitcast to the desired type. return DAG.getBitcast(VT, BV); } // Simplify (build_vec (trunc $1) // (trunc (srl $1 half-width)) // (trunc (srl $1 (2 * half-width))) …) // to (bitcast $1) SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) { assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector"); // Only for little endian if (!DAG.getDataLayout().isLittleEndian()) return SDValue(); SDLoc DL(N); EVT VT = N->getValueType(0); EVT OutScalarTy = VT.getScalarType(); uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits(); // Only for power of two types to be sure that bitcast works well if (!isPowerOf2_64(ScalarTypeBitsize)) return SDValue(); unsigned NumInScalars = N->getNumOperands(); // Look through bitcasts auto PeekThroughBitcast = [](SDValue Op) { if (Op.getOpcode() == ISD::BITCAST) return Op.getOperand(0); return Op; }; // The source value where all the parts are extracted. SDValue Src; for (unsigned i = 0; i != NumInScalars; ++i) { SDValue In = PeekThroughBitcast(N->getOperand(i)); // Ignore undef inputs. if (In.isUndef()) continue; if (In.getOpcode() != ISD::TRUNCATE) return SDValue(); In = PeekThroughBitcast(In.getOperand(0)); if (In.getOpcode() != ISD::SRL) { // For now only build_vec without shuffling, handle shifts here in the // future. if (i != 0) return SDValue(); Src = In; } else { // In is SRL SDValue part = PeekThroughBitcast(In.getOperand(0)); if (!Src) { Src = part; } else if (Src != part) { // Vector parts do not stem from the same variable return SDValue(); } SDValue ShiftAmtVal = In.getOperand(1); if (!isa(ShiftAmtVal)) return SDValue(); uint64_t ShiftAmt = In.getConstantOperandVal(1); // The extracted value is not extracted at the right position if (ShiftAmt != i * ScalarTypeBitsize) return SDValue(); } } // Only cast if the size is the same if (Src.getValueType().getSizeInBits() != VT.getSizeInBits()) return SDValue(); return DAG.getBitcast(VT, Src); } SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N, ArrayRef VectorMask, SDValue VecIn1, SDValue VecIn2, unsigned LeftIdx, bool DidSplitVec) { SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL); EVT VT = N->getValueType(0); EVT InVT1 = VecIn1.getValueType(); EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1; unsigned NumElems = VT.getVectorNumElements(); unsigned ShuffleNumElems = NumElems; // If we artificially split a vector in two already, then the offsets in the // operands will all be based off of VecIn1, even those in VecIn2. unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements(); uint64_t VTSize = VT.getFixedSizeInBits(); uint64_t InVT1Size = InVT1.getFixedSizeInBits(); uint64_t InVT2Size = InVT2.getFixedSizeInBits(); assert(InVT2Size <= InVT1Size && "Inputs must be sorted to be in non-increasing vector size order."); // We can't generate a shuffle node with mismatched input and output types. // Try to make the types match the type of the output. if (InVT1 != VT || InVT2 != VT) { if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) { // If the output vector length is a multiple of both input lengths, // we can concatenate them and pad the rest with undefs. unsigned NumConcats = VTSize / InVT1Size; assert(NumConcats >= 2 && "Concat needs at least two inputs!"); SmallVector ConcatOps(NumConcats, DAG.getUNDEF(InVT1)); ConcatOps[0] = VecIn1; ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1); VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); VecIn2 = SDValue(); } else if (InVT1Size == VTSize * 2) { if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems)) return SDValue(); if (!VecIn2.getNode()) { // If we only have one input vector, and it's twice the size of the // output, split it in two. VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, DAG.getVectorIdxConstant(NumElems, DL)); VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx); // Since we now have shorter input vectors, adjust the offset of the // second vector's start. Vec2Offset = NumElems; } else { assert(InVT2Size <= InVT1Size && "Second input is not going to be larger than the first one."); // VecIn1 is wider than the output, and we have another, possibly // smaller input. Pad the smaller input with undefs, shuffle at the // input vector width, and extract the output. // The shuffle type is different than VT, so check legality again. if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1)) return SDValue(); // Legalizing INSERT_SUBVECTOR is tricky - you basically have to // lower it back into a BUILD_VECTOR. So if the inserted type is // illegal, don't even try. if (InVT1 != InVT2) { if (!TLI.isTypeLegal(InVT2)) return SDValue(); VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1, DAG.getUNDEF(InVT1), VecIn2, ZeroIdx); } ShuffleNumElems = NumElems * 2; } } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) { SmallVector ConcatOps(2, DAG.getUNDEF(InVT2)); ConcatOps[0] = VecIn2; VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); } else { // TODO: Support cases where the length mismatch isn't exactly by a // factor of 2. // TODO: Move this check upwards, so that if we have bad type // mismatches, we don't create any DAG nodes. return SDValue(); } } // Initialize mask to undef. SmallVector Mask(ShuffleNumElems, -1); // Only need to run up to the number of elements actually used, not the // total number of elements in the shuffle - if we are shuffling a wider // vector, the high lanes should be set to undef. for (unsigned i = 0; i != NumElems; ++i) { if (VectorMask[i] <= 0) continue; unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1); if (VectorMask[i] == (int)LeftIdx) { Mask[i] = ExtIndex; } else if (VectorMask[i] == (int)LeftIdx + 1) { Mask[i] = Vec2Offset + ExtIndex; } } // The type the input vectors may have changed above. InVT1 = VecIn1.getValueType(); // If we already have a VecIn2, it should have the same type as VecIn1. // If we don't, get an undef/zero vector of the appropriate type. VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1); assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type."); SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask); if (ShuffleNumElems > NumElems) Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx); return Shuffle; } static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) { assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector"); // First, determine where the build vector is not undef. // TODO: We could extend this to handle zero elements as well as undefs. int NumBVOps = BV->getNumOperands(); int ZextElt = -1; for (int i = 0; i != NumBVOps; ++i) { SDValue Op = BV->getOperand(i); if (Op.isUndef()) continue; if (ZextElt == -1) ZextElt = i; else return SDValue(); } // Bail out if there's no non-undef element. if (ZextElt == -1) return SDValue(); // The build vector contains some number of undef elements and exactly // one other element. That other element must be a zero-extended scalar // extracted from a vector at a constant index to turn this into a shuffle. // Also, require that the build vector does not implicitly truncate/extend // its elements. // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND. EVT VT = BV->getValueType(0); SDValue Zext = BV->getOperand(ZextElt); if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() || Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT || !isa(Zext.getOperand(0).getOperand(1)) || Zext.getValueSizeInBits() != VT.getScalarSizeInBits()) return SDValue(); // The zero-extend must be a multiple of the source size, and we must be // building a vector of the same size as the source of the extract element. SDValue Extract = Zext.getOperand(0); unsigned DestSize = Zext.getValueSizeInBits(); unsigned SrcSize = Extract.getValueSizeInBits(); if (DestSize % SrcSize != 0 || Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits()) return SDValue(); // Create a shuffle mask that will combine the extracted element with zeros // and undefs. int ZextRatio = DestSize / SrcSize; int NumMaskElts = NumBVOps * ZextRatio; SmallVector ShufMask(NumMaskElts, -1); for (int i = 0; i != NumMaskElts; ++i) { if (i / ZextRatio == ZextElt) { // The low bits of the (potentially translated) extracted element map to // the source vector. The high bits map to zero. We will use a zero vector // as the 2nd source operand of the shuffle, so use the 1st element of // that vector (mask value is number-of-elements) for the high bits. if (i % ZextRatio == 0) ShufMask[i] = Extract.getConstantOperandVal(1); else ShufMask[i] = NumMaskElts; } // Undef elements of the build vector remain undef because we initialize // the shuffle mask with -1. } // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... --> // bitcast (shuffle V, ZeroVec, VectorMask) SDLoc DL(BV); EVT VecVT = Extract.getOperand(0).getValueType(); SDValue ZeroVec = DAG.getConstant(0, DL, VecVT); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec, ShufMask, DAG); if (!Shuf) return SDValue(); return DAG.getBitcast(VT, Shuf); } // FIXME: promote to STLExtras. template static auto getFirstIndexOf(R &&Range, const T &Val) { auto I = find(Range, Val); if (I == Range.end()) return static_cast(-1); return std::distance(Range.begin(), I); } // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT // operations. If the types of the vectors we're extracting from allow it, // turn this into a vector_shuffle node. SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { SDLoc DL(N); EVT VT = N->getValueType(0); // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes. if (!isTypeLegal(VT)) return SDValue(); if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG)) return V; // May only combine to shuffle after legalize if shuffle is legal. if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT)) return SDValue(); bool UsesZeroVector = false; unsigned NumElems = N->getNumOperands(); // Record, for each element of the newly built vector, which input vector // that element comes from. -1 stands for undef, 0 for the zero vector, // and positive values for the input vectors. // VectorMask maps each element to its vector number, and VecIn maps vector // numbers to their initial SDValues. SmallVector VectorMask(NumElems, -1); SmallVector VecIn; VecIn.push_back(SDValue()); for (unsigned i = 0; i != NumElems; ++i) { SDValue Op = N->getOperand(i); if (Op.isUndef()) continue; // See if we can use a blend with a zero vector. // TODO: Should we generalize this to a blend with an arbitrary constant // vector? if (isNullConstant(Op) || isNullFPConstant(Op)) { UsesZeroVector = true; VectorMask[i] = 0; continue; } // Not an undef or zero. If the input is something other than an // EXTRACT_VECTOR_ELT with an in-range constant index, bail out. if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || !isa(Op.getOperand(1))) return SDValue(); SDValue ExtractedFromVec = Op.getOperand(0); if (ExtractedFromVec.getValueType().isScalableVector()) return SDValue(); const APInt &ExtractIdx = Op.getConstantOperandAPInt(1); if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements())) return SDValue(); // All inputs must have the same element type as the output. if (VT.getVectorElementType() != ExtractedFromVec.getValueType().getVectorElementType()) return SDValue(); // Have we seen this input vector before? // The vectors are expected to be tiny (usually 1 or 2 elements), so using // a map back from SDValues to numbers isn't worth it. int Idx = getFirstIndexOf(VecIn, ExtractedFromVec); if (Idx == -1) { // A new source vector? Idx = VecIn.size(); VecIn.push_back(ExtractedFromVec); } VectorMask[i] = Idx; } // If we didn't find at least one input vector, bail out. if (VecIn.size() < 2) return SDValue(); // If all the Operands of BUILD_VECTOR extract from same // vector, then split the vector efficiently based on the maximum // vector access index and adjust the VectorMask and // VecIn accordingly. bool DidSplitVec = false; if (VecIn.size() == 2) { unsigned MaxIndex = 0; unsigned NearestPow2 = 0; SDValue Vec = VecIn.back(); EVT InVT = Vec.getValueType(); SmallVector IndexVec(NumElems, 0); for (unsigned i = 0; i < NumElems; i++) { if (VectorMask[i] <= 0) continue; unsigned Index = N->getOperand(i).getConstantOperandVal(1); IndexVec[i] = Index; MaxIndex = std::max(MaxIndex, Index); } NearestPow2 = PowerOf2Ceil(MaxIndex); if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 && NumElems * 2 < NearestPow2) { unsigned SplitSize = NearestPow2 / 2; EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), SplitSize); if (TLI.isTypeLegal(SplitVT) && SplitSize + SplitVT.getVectorNumElements() <= InVT.getVectorNumElements()) { SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec, DAG.getVectorIdxConstant(SplitSize, DL)); SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec, DAG.getVectorIdxConstant(0, DL)); VecIn.pop_back(); VecIn.push_back(VecIn1); VecIn.push_back(VecIn2); DidSplitVec = true; for (unsigned i = 0; i < NumElems; i++) { if (VectorMask[i] <= 0) continue; VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2; } } } } // Sort input vectors by decreasing vector element count, // while preserving the relative order of equally-sized vectors. // Note that we keep the first "implicit zero vector as-is. SmallVector SortedVecIn(VecIn); llvm::stable_sort(MutableArrayRef(SortedVecIn).drop_front(), [](const SDValue &a, const SDValue &b) { return a.getValueType().getVectorNumElements() > b.getValueType().getVectorNumElements(); }); // We now also need to rebuild the VectorMask, because it referenced element // order in VecIn, and we just sorted them. for (int &SourceVectorIndex : VectorMask) { if (SourceVectorIndex <= 0) continue; unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]); assert(Idx > 0 && Idx < SortedVecIn.size() && VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure"); SourceVectorIndex = Idx; } VecIn = std::move(SortedVecIn); // TODO: Should this fire if some of the input vectors has illegal type (like // it does now), or should we let legalization run its course first? // Shuffle phase: // Take pairs of vectors, and shuffle them so that the result has elements // from these vectors in the correct places. // For example, given: // t10: i32 = extract_vector_elt t1, Constant:i64<0> // t11: i32 = extract_vector_elt t2, Constant:i64<0> // t12: i32 = extract_vector_elt t3, Constant:i64<0> // t13: i32 = extract_vector_elt t1, Constant:i64<1> // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13 // We will generate: // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2 // t21: v4i32 = vector_shuffle t3, undef SmallVector Shuffles; for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) { unsigned LeftIdx = 2 * In + 1; SDValue VecLeft = VecIn[LeftIdx]; SDValue VecRight = (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue(); if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft, VecRight, LeftIdx, DidSplitVec)) Shuffles.push_back(Shuffle); else return SDValue(); } // If we need the zero vector as an "ingredient" in the blend tree, add it // to the list of shuffles. if (UsesZeroVector) Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT) : DAG.getConstantFP(0.0, DL, VT)); // If we only have one shuffle, we're done. if (Shuffles.size() == 1) return Shuffles[0]; // Update the vector mask to point to the post-shuffle vectors. for (int &Vec : VectorMask) if (Vec == 0) Vec = Shuffles.size() - 1; else Vec = (Vec - 1) / 2; // More than one shuffle. Generate a binary tree of blends, e.g. if from // the previous step we got the set of shuffles t10, t11, t12, t13, we will // generate: // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2 // t11: v8i32 = vector_shuffle t3, t4 // t12: v8i32 = vector_shuffle t5, t6 // t13: v8i32 = vector_shuffle t7, t8 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11 // t21: v8i32 = vector_shuffle t12, t13 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21 // Make sure the initial size of the shuffle list is even. if (Shuffles.size() % 2) Shuffles.push_back(DAG.getUNDEF(VT)); for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) { if (CurSize % 2) { Shuffles[CurSize] = DAG.getUNDEF(VT); CurSize++; } for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) { int Left = 2 * In; int Right = 2 * In + 1; SmallVector Mask(NumElems, -1); SDValue L = Shuffles[Left]; ArrayRef LMask; bool IsLeftShuffle = L.getOpcode() == ISD::VECTOR_SHUFFLE && L.use_empty() && L.getOperand(1).isUndef() && L.getOperand(0).getValueType() == L.getValueType(); if (IsLeftShuffle) { LMask = cast(L.getNode())->getMask(); L = L.getOperand(0); } SDValue R = Shuffles[Right]; ArrayRef RMask; bool IsRightShuffle = R.getOpcode() == ISD::VECTOR_SHUFFLE && R.use_empty() && R.getOperand(1).isUndef() && R.getOperand(0).getValueType() == R.getValueType(); if (IsRightShuffle) { RMask = cast(R.getNode())->getMask(); R = R.getOperand(0); } for (unsigned I = 0; I != NumElems; ++I) { if (VectorMask[I] == Left) { Mask[I] = I; if (IsLeftShuffle) Mask[I] = LMask[I]; VectorMask[I] = In; } else if (VectorMask[I] == Right) { Mask[I] = I + NumElems; if (IsRightShuffle) Mask[I] = RMask[I] + NumElems; VectorMask[I] = In; } } Shuffles[In] = DAG.getVectorShuffle(VT, DL, L, R, Mask); } } return Shuffles[0]; } // Try to turn a build vector of zero extends of extract vector elts into a // a vector zero extend and possibly an extract subvector. // TODO: Support sign extend? // TODO: Allow undef elements? SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) { if (LegalOperations) return SDValue(); EVT VT = N->getValueType(0); bool FoundZeroExtend = false; SDValue Op0 = N->getOperand(0); auto checkElem = [&](SDValue Op) -> int64_t { unsigned Opc = Op.getOpcode(); FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND); if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) && Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT && Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0)) if (auto *C = dyn_cast(Op.getOperand(0).getOperand(1))) return C->getZExtValue(); return -1; }; // Make sure the first element matches // (zext (extract_vector_elt X, C)) // Offset must be a constant multiple of the // known-minimum vector length of the result type. int64_t Offset = checkElem(Op0); if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0) return SDValue(); unsigned NumElems = N->getNumOperands(); SDValue In = Op0.getOperand(0).getOperand(0); EVT InSVT = In.getValueType().getScalarType(); EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems); // Don't create an illegal input type after type legalization. if (LegalTypes && !TLI.isTypeLegal(InVT)) return SDValue(); // Ensure all the elements come from the same vector and are adjacent. for (unsigned i = 1; i != NumElems; ++i) { if ((Offset + i) != checkElem(N->getOperand(i))) return SDValue(); } SDLoc DL(N); In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In, Op0.getOperand(0).getOperand(1)); return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL, VT, In); } SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { EVT VT = N->getValueType(0); // A vector built entirely of undefs is undef. if (ISD::allOperandsUndef(N)) return DAG.getUNDEF(VT); // If this is a splat of a bitcast from another vector, change to a // concat_vector. // For example: // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) -> // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X)))) // // If X is a build_vector itself, the concat can become a larger build_vector. // TODO: Maybe this is useful for non-splat too? if (!LegalOperations) { if (SDValue Splat = cast(N)->getSplatValue()) { Splat = peekThroughBitcasts(Splat); EVT SrcVT = Splat.getValueType(); if (SrcVT.isVector()) { unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements(); EVT NewVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getVectorElementType(), NumElts); if (!LegalTypes || TLI.isTypeLegal(NewVT)) { SmallVector Ops(N->getNumOperands(), Splat); SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), NewVT, Ops); return DAG.getBitcast(VT, Concat); } } } } // Check if we can express BUILD VECTOR via subvector extract. if (!LegalTypes && (N->getNumOperands() > 1)) { SDValue Op0 = N->getOperand(0); auto checkElem = [&](SDValue Op) -> uint64_t { if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) && (Op0.getOperand(0) == Op.getOperand(0))) if (auto CNode = dyn_cast(Op.getOperand(1))) return CNode->getZExtValue(); return -1; }; int Offset = checkElem(Op0); for (unsigned i = 0; i < N->getNumOperands(); ++i) { if (Offset + i != checkElem(N->getOperand(i))) { Offset = -1; break; } } if ((Offset == 0) && (Op0.getOperand(0).getValueType() == N->getValueType(0))) return Op0.getOperand(0); if ((Offset != -1) && ((Offset % N->getValueType(0).getVectorNumElements()) == 0)) // IDX must be multiple of output size. return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0), Op0.getOperand(0), Op0.getOperand(1)); } if (SDValue V = convertBuildVecZextToZext(N)) return V; if (SDValue V = reduceBuildVecExtToExtBuildVec(N)) return V; if (SDValue V = reduceBuildVecTruncToBitCast(N)) return V; if (SDValue V = reduceBuildVecToShuffle(N)) return V; // A splat of a single element is a SPLAT_VECTOR if supported on the target. // Do this late as some of the above may replace the splat. if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand) if (SDValue V = cast(N)->getSplatValue()) { assert(!V.isUndef() && "Splat of undef should have been handled earlier"); return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V); } return SDValue(); } static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT OpVT = N->getOperand(0).getValueType(); // If the operands are legal vectors, leave them alone. if (TLI.isTypeLegal(OpVT)) return SDValue(); SDLoc DL(N); EVT VT = N->getValueType(0); SmallVector Ops; EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits()); SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT); // Keep track of what we encounter. bool AnyInteger = false; bool AnyFP = false; for (const SDValue &Op : N->ops()) { if (ISD::BITCAST == Op.getOpcode() && !Op.getOperand(0).getValueType().isVector()) Ops.push_back(Op.getOperand(0)); else if (ISD::UNDEF == Op.getOpcode()) Ops.push_back(ScalarUndef); else return SDValue(); // Note whether we encounter an integer or floating point scalar. // If it's neither, bail out, it could be something weird like x86mmx. EVT LastOpVT = Ops.back().getValueType(); if (LastOpVT.isFloatingPoint()) AnyFP = true; else if (LastOpVT.isInteger()) AnyInteger = true; else return SDValue(); } // If any of the operands is a floating point scalar bitcast to a vector, // use floating point types throughout, and bitcast everything. // Replace UNDEFs by another scalar UNDEF node, of the final desired type. if (AnyFP) { SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits()); ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT); if (AnyInteger) { for (SDValue &Op : Ops) { if (Op.getValueType() == SVT) continue; if (Op.isUndef()) Op = ScalarUndef; else Op = DAG.getBitcast(SVT, Op); } } } EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT, VT.getSizeInBits() / SVT.getSizeInBits()); return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops)); } // Attempt to merge nested concat_vectors/undefs. // Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d)) // --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d) static SDValue combineConcatVectorOfConcatVectors(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types. EVT SubVT; SDValue FirstConcat; for (const SDValue &Op : N->ops()) { if (Op.isUndef()) continue; if (Op.getOpcode() != ISD::CONCAT_VECTORS) return SDValue(); if (!FirstConcat) { SubVT = Op.getOperand(0).getValueType(); if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT)) return SDValue(); FirstConcat = Op; continue; } if (SubVT != Op.getOperand(0).getValueType()) return SDValue(); } assert(FirstConcat && "Concat of all-undefs found"); SmallVector ConcatOps; for (const SDValue &Op : N->ops()) { if (Op.isUndef()) { ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT)); continue; } ConcatOps.append(Op->op_begin(), Op->op_end()); } return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps); } // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at // most two distinct vectors the same size as the result, attempt to turn this // into a legal shuffle. static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); EVT OpVT = N->getOperand(0).getValueType(); // We currently can't generate an appropriate shuffle for a scalable vector. if (VT.isScalableVector()) return SDValue(); int NumElts = VT.getVectorNumElements(); int NumOpElts = OpVT.getVectorNumElements(); SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT); SmallVector Mask; for (SDValue Op : N->ops()) { Op = peekThroughBitcasts(Op); // UNDEF nodes convert to UNDEF shuffle mask values. if (Op.isUndef()) { Mask.append((unsigned)NumOpElts, -1); continue; } if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR) return SDValue(); // What vector are we extracting the subvector from and at what index? SDValue ExtVec = Op.getOperand(0); int ExtIdx = Op.getConstantOperandVal(1); // We want the EVT of the original extraction to correctly scale the // extraction index. EVT ExtVT = ExtVec.getValueType(); ExtVec = peekThroughBitcasts(ExtVec); // UNDEF nodes convert to UNDEF shuffle mask values. if (ExtVec.isUndef()) { Mask.append((unsigned)NumOpElts, -1); continue; } // Ensure that we are extracting a subvector from a vector the same // size as the result. if (ExtVT.getSizeInBits() != VT.getSizeInBits()) return SDValue(); // Scale the subvector index to account for any bitcast. int NumExtElts = ExtVT.getVectorNumElements(); if (0 == (NumExtElts % NumElts)) ExtIdx /= (NumExtElts / NumElts); else if (0 == (NumElts % NumExtElts)) ExtIdx *= (NumElts / NumExtElts); else return SDValue(); // At most we can reference 2 inputs in the final shuffle. if (SV0.isUndef() || SV0 == ExtVec) { SV0 = ExtVec; for (int i = 0; i != NumOpElts; ++i) Mask.push_back(i + ExtIdx); } else if (SV1.isUndef() || SV1 == ExtVec) { SV1 = ExtVec; for (int i = 0; i != NumOpElts; ++i) Mask.push_back(i + ExtIdx + NumElts); } else { return SDValue(); } } const TargetLowering &TLI = DAG.getTargetLoweringInfo(); return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0), DAG.getBitcast(VT, SV1), Mask, DAG); } static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) { unsigned CastOpcode = N->getOperand(0).getOpcode(); switch (CastOpcode) { case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: // TODO: Allow more opcodes? // case ISD::BITCAST: // case ISD::TRUNCATE: // case ISD::ZERO_EXTEND: // case ISD::SIGN_EXTEND: // case ISD::FP_EXTEND: break; default: return SDValue(); } EVT SrcVT = N->getOperand(0).getOperand(0).getValueType(); if (!SrcVT.isVector()) return SDValue(); // All operands of the concat must be the same kind of cast from the same // source type. SmallVector SrcOps; for (SDValue Op : N->ops()) { if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() || Op.getOperand(0).getValueType() != SrcVT) return SDValue(); SrcOps.push_back(Op.getOperand(0)); } // The wider cast must be supported by the target. This is unusual because // the operation support type parameter depends on the opcode. In addition, // check the other type in the cast to make sure this is really legal. EVT VT = N->getValueType(0); EVT SrcEltVT = SrcVT.getVectorElementType(); ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands(); EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); switch (CastOpcode) { case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) || !TLI.isTypeLegal(VT)) return SDValue(); break; case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) || !TLI.isTypeLegal(ConcatSrcVT)) return SDValue(); break; default: llvm_unreachable("Unexpected cast opcode"); } // concat (cast X), (cast Y)... -> cast (concat X, Y...) SDLoc DL(N); SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps); return DAG.getNode(CastOpcode, DL, VT, NewConcat); } SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { // If we only have one input vector, we don't need to do any concatenation. if (N->getNumOperands() == 1) return N->getOperand(0); // Check if all of the operands are undefs. EVT VT = N->getValueType(0); if (ISD::allOperandsUndef(N)) return DAG.getUNDEF(VT); // Optimize concat_vectors where all but the first of the vectors are undef. if (all_of(drop_begin(N->ops()), [](const SDValue &Op) { return Op.isUndef(); })) { SDValue In = N->getOperand(0); assert(In.getValueType().isVector() && "Must concat vectors"); // If the input is a concat_vectors, just make a larger concat by padding // with smaller undefs. if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) { unsigned NumOps = N->getNumOperands() * In.getNumOperands(); SmallVector Ops(In->op_begin(), In->op_end()); Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType())); return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); } SDValue Scalar = peekThroughOneUseBitcasts(In); // concat_vectors(scalar_to_vector(scalar), undef) -> // scalar_to_vector(scalar) if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR && Scalar.hasOneUse()) { EVT SVT = Scalar.getValueType().getVectorElementType(); if (SVT == Scalar.getOperand(0).getValueType()) Scalar = Scalar.getOperand(0); } // concat_vectors(scalar, undef) -> scalar_to_vector(scalar) if (!Scalar.getValueType().isVector()) { // If the bitcast type isn't legal, it might be a trunc of a legal type; // look through the trunc so we can still do the transform: // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar) if (Scalar->getOpcode() == ISD::TRUNCATE && !TLI.isTypeLegal(Scalar.getValueType()) && TLI.isTypeLegal(Scalar->getOperand(0).getValueType())) Scalar = Scalar->getOperand(0); EVT SclTy = Scalar.getValueType(); if (!SclTy.isFloatingPoint() && !SclTy.isInteger()) return SDValue(); // Bail out if the vector size is not a multiple of the scalar size. if (VT.getSizeInBits() % SclTy.getSizeInBits()) return SDValue(); unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits(); if (VNTNumElms < 2) return SDValue(); EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms); if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType())) return SDValue(); SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar); return DAG.getBitcast(VT, Res); } } // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR. // We have already tested above for an UNDEF only concatenation. // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...)) // -> (BUILD_VECTOR A, B, ..., C, D, ...) auto IsBuildVectorOrUndef = [](const SDValue &Op) { return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode(); }; if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) { SmallVector Opnds; EVT SVT = VT.getScalarType(); EVT MinVT = SVT; if (!SVT.isFloatingPoint()) { // If BUILD_VECTOR are from built from integer, they may have different // operand types. Get the smallest type and truncate all operands to it. bool FoundMinVT = false; for (const SDValue &Op : N->ops()) if (ISD::BUILD_VECTOR == Op.getOpcode()) { EVT OpSVT = Op.getOperand(0).getValueType(); MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT; FoundMinVT = true; } assert(FoundMinVT && "Concat vector type mismatch"); } for (const SDValue &Op : N->ops()) { EVT OpVT = Op.getValueType(); unsigned NumElts = OpVT.getVectorNumElements(); if (ISD::UNDEF == Op.getOpcode()) Opnds.append(NumElts, DAG.getUNDEF(MinVT)); if (ISD::BUILD_VECTOR == Op.getOpcode()) { if (SVT.isFloatingPoint()) { assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch"); Opnds.append(Op->op_begin(), Op->op_begin() + NumElts); } else { for (unsigned i = 0; i != NumElts; ++i) Opnds.push_back( DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i))); } } } assert(VT.getVectorNumElements() == Opnds.size() && "Concat vector type mismatch"); return DAG.getBuildVector(VT, SDLoc(N), Opnds); } // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR. // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...). if (SDValue V = combineConcatVectorOfScalars(N, DAG)) return V; if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) { // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE. if (SDValue V = combineConcatVectorOfConcatVectors(N, DAG)) return V; // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE. if (SDValue V = combineConcatVectorOfExtracts(N, DAG)) return V; } if (SDValue V = combineConcatVectorOfCasts(N, DAG)) return V; // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR // operands and look for a CONCAT operations that place the incoming vectors // at the exact same location. // // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled. SDValue SingleSource = SDValue(); unsigned PartNumElem = N->getOperand(0).getValueType().getVectorMinNumElements(); for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { SDValue Op = N->getOperand(i); if (Op.isUndef()) continue; // Check if this is the identity extract: if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR) return SDValue(); // Find the single incoming vector for the extract_subvector. if (SingleSource.getNode()) { if (Op.getOperand(0) != SingleSource) return SDValue(); } else { SingleSource = Op.getOperand(0); // Check the source type is the same as the type of the result. // If not, this concat may extend the vector, so we can not // optimize it away. if (SingleSource.getValueType() != N->getValueType(0)) return SDValue(); } // Check that we are reading from the identity index. unsigned IdentityIndex = i * PartNumElem; if (Op.getConstantOperandAPInt(1) != IdentityIndex) return SDValue(); } if (SingleSource.getNode()) return SingleSource; return SDValue(); } // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find // if the subvector can be sourced for free. static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) { if (V.getOpcode() == ISD::INSERT_SUBVECTOR && V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) { return V.getOperand(1); } auto *IndexC = dyn_cast(Index); if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS && V.getOperand(0).getValueType() == SubVT && (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) { uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements(); return V.getOperand(SubIdx); } return SDValue(); } static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, SelectionDAG &DAG, bool LegalOperations) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue BinOp = Extract->getOperand(0); unsigned BinOpcode = BinOp.getOpcode(); if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1) return SDValue(); EVT VecVT = BinOp.getValueType(); SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1); if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType()) return SDValue(); SDValue Index = Extract->getOperand(1); EVT SubVT = Extract->getValueType(0); if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations)) return SDValue(); SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT); SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT); // TODO: We could handle the case where only 1 operand is being inserted by // creating an extract of the other operand, but that requires checking // number of uses and/or costs. if (!Sub0 || !Sub1) return SDValue(); // We are inserting both operands of the wide binop only to extract back // to the narrow vector size. Eliminate all of the insert/extract: // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1, BinOp->getFlags()); } /// If we are extracting a subvector produced by a wide binary operator try /// to use a narrow binary operator and/or avoid concatenation and extraction. static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG, bool LegalOperations) { // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share // some of these bailouts with other transforms. if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations)) return V; // The extract index must be a constant, so we can map it to a concat operand. auto *ExtractIndexC = dyn_cast(Extract->getOperand(1)); if (!ExtractIndexC) return SDValue(); // We are looking for an optionally bitcasted wide vector binary operator // feeding an extract subvector. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0)); unsigned BOpcode = BinOp.getOpcode(); if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1) return SDValue(); // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be // reduced to the unary fneg when it is visited, and we probably want to deal // with fneg in a target-specific way. if (BOpcode == ISD::FSUB) { auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true); if (C && C->getValueAPF().isNegZero()) return SDValue(); } // The binop must be a vector type, so we can extract some fraction of it. EVT WideBVT = BinOp.getValueType(); // The optimisations below currently assume we are dealing with fixed length // vectors. It is possible to add support for scalable vectors, but at the // moment we've done no analysis to prove whether they are profitable or not. if (!WideBVT.isFixedLengthVector()) return SDValue(); EVT VT = Extract->getValueType(0); unsigned ExtractIndex = ExtractIndexC->getZExtValue(); assert(ExtractIndex % VT.getVectorNumElements() == 0 && "Extract index is not a multiple of the vector length."); // Bail out if this is not a proper multiple width extraction. unsigned WideWidth = WideBVT.getSizeInBits(); unsigned NarrowWidth = VT.getSizeInBits(); if (WideWidth % NarrowWidth != 0) return SDValue(); // Bail out if we are extracting a fraction of a single operation. This can // occur because we potentially looked through a bitcast of the binop. unsigned NarrowingRatio = WideWidth / NarrowWidth; unsigned WideNumElts = WideBVT.getVectorNumElements(); if (WideNumElts % NarrowingRatio != 0) return SDValue(); // Bail out if the target does not support a narrower version of the binop. EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(), WideNumElts / NarrowingRatio); if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT)) return SDValue(); // If extraction is cheap, we don't need to look at the binop operands // for concat ops. The narrow binop alone makes this transform profitable. // We can't just reuse the original extract index operand because we may have // bitcasted. unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements(); unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements(); if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) && BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) { // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N) SDLoc DL(Extract); SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL); SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, BinOp.getOperand(0), NewExtIndex); SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, BinOp.getOperand(1), NewExtIndex); SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags()); return DAG.getBitcast(VT, NarrowBinOp); } // Only handle the case where we are doubling and then halving. A larger ratio // may require more than two narrow binops to replace the wide binop. if (NarrowingRatio != 2) return SDValue(); // TODO: The motivating case for this transform is an x86 AVX1 target. That // target has temptingly almost legal versions of bitwise logic ops in 256-bit // flavors, but no other 256-bit integer support. This could be extended to // handle any binop, but that may require fixing/adding other folds to avoid // codegen regressions. if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR) return SDValue(); // We need at least one concatenation operation of a binop operand to make // this transform worthwhile. The concat must double the input vector sizes. auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue { if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2) return V.getOperand(ConcatOpNum); return SDValue(); }; SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0))); SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1))); if (SubVecL || SubVecR) { // If a binop operand was not the result of a concat, we must extract a // half-sized operand for our new narrow binop: // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC) // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN SDLoc DL(Extract); SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL); SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL) : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, BinOp.getOperand(0), IndexC); SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR) : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, BinOp.getOperand(1), IndexC); SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y); return DAG.getBitcast(VT, NarrowBinOp); } return SDValue(); } /// If we are extracting a subvector from a wide vector load, convert to a /// narrow load to eliminate the extraction: /// (extract_subvector (load wide vector)) --> (load narrow vector) static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { // TODO: Add support for big-endian. The offset calculation must be adjusted. if (DAG.getDataLayout().isBigEndian()) return SDValue(); auto *Ld = dyn_cast(Extract->getOperand(0)); if (!Ld || Ld->getExtensionType() || !Ld->isSimple()) return SDValue(); // Allow targets to opt-out. EVT VT = Extract->getValueType(0); // We can only create byte sized loads. if (!VT.isByteSized()) return SDValue(); unsigned Index = Extract->getConstantOperandVal(1); unsigned NumElts = VT.getVectorMinNumElements(); // The definition of EXTRACT_SUBVECTOR states that the index must be a // multiple of the minimum number of elements in the result type. assert(Index % NumElts == 0 && "The extract subvector index is not a " "multiple of the result's element count"); // It's fine to use TypeSize here as we know the offset will not be negative. TypeSize Offset = VT.getStoreSize() * (Index / NumElts); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT)) return SDValue(); // The narrow load will be offset from the base address of the old load if // we are extracting from something besides index 0 (little-endian). SDLoc DL(Extract); // TODO: Use "BaseIndexOffset" to make this more effective. SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL); uint64_t StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize()); MachineFunction &MF = DAG.getMachineFunction(); MachineMemOperand *MMO; if (Offset.isScalable()) { MachinePointerInfo MPI = MachinePointerInfo(Ld->getPointerInfo().getAddrSpace()); MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize); } else MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedSize(), StoreSize); SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO); DAG.makeEquivalentMemoryOrdering(Ld, NewLd); return NewLd; } /// Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)), /// try to produce VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?), /// EXTRACT_SUBVECTOR(Op?, ?), /// Mask')) /// iff it is legal and profitable to do so. Notably, the trimmed mask /// (containing only the elements that are extracted) /// must reference at most two subvectors. static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations) { assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Must only be called on EXTRACT_SUBVECTOR's"); SDValue N0 = N->getOperand(0); // Only deal with non-scalable vectors. EVT NarrowVT = N->getValueType(0); EVT WideVT = N0.getValueType(); if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector()) return SDValue(); // The operand must be a shufflevector. auto *WideShuffleVector = dyn_cast(N0); if (!WideShuffleVector) return SDValue(); // The old shuffleneeds to go away. if (!WideShuffleVector->hasOneUse()) return SDValue(); // And the narrow shufflevector that we'll form must be legal. if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, NarrowVT)) return SDValue(); uint64_t FirstExtractedEltIdx = N->getConstantOperandVal(1); int NumEltsExtracted = NarrowVT.getVectorNumElements(); assert((FirstExtractedEltIdx % NumEltsExtracted) == 0 && "Extract index is not a multiple of the output vector length."); int WideNumElts = WideVT.getVectorNumElements(); SmallVector NewMask; NewMask.reserve(NumEltsExtracted); SmallSetVector, 2> DemandedSubvectors; // Try to decode the wide mask into narrow mask from at most two subvectors. for (int M : WideShuffleVector->getMask().slice(FirstExtractedEltIdx, NumEltsExtracted)) { assert((M >= -1) && (M < (2 * WideNumElts)) && "Out-of-bounds shuffle mask?"); if (M < 0) { // Does not depend on operands, does not require adjustment. NewMask.emplace_back(M); continue; } // From which operand of the shuffle does this shuffle mask element pick? int WideShufOpIdx = M / WideNumElts; // Which element of that operand is picked? int OpEltIdx = M % WideNumElts; assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M && "Shuffle mask vector decomposition failure."); // And which NumEltsExtracted-sized subvector of that operand is that? int OpSubvecIdx = OpEltIdx / NumEltsExtracted; // And which element within that subvector of that operand is that? int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted; assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx && "Shuffle mask subvector decomposition failure."); assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted + WideShufOpIdx * WideNumElts) == M && "Shuffle mask full decomposition failure."); SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx); if (Op.isUndef()) { // Picking from an undef operand. Let's adjust mask instead. NewMask.emplace_back(-1); continue; } // Profitability check: only deal with extractions from the first subvector. if (OpSubvecIdx != 0) return SDValue(); const std::pair DemandedSubvector = std::make_pair(Op, OpSubvecIdx); if (DemandedSubvectors.insert(DemandedSubvector)) { if (DemandedSubvectors.size() > 2) return SDValue(); // We can't handle more than two subvectors. // How many elements into the WideVT does this subvector start? int Index = NumEltsExtracted * OpSubvecIdx; // Bail out if the extraction isn't going to be cheap. if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index)) return SDValue(); } // Ok, but from which operand of the new shuffle will this element pick? int NewOpIdx = getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector); assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index."); int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted; NewMask.emplace_back(AdjM); } assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask."); assert(DemandedSubvectors.size() <= 2 && "Should have ended up demanding at most two subvectors."); // Did we discover that the shuffle does not actually depend on operands? if (DemandedSubvectors.empty()) return DAG.getUNDEF(NarrowVT); // We still perform the exact same EXTRACT_SUBVECTOR, just on different // operand[s]/index[es], so there is no point in checking for it's legality. // Do not turn a legal shuffle into an illegal one. if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) && !TLI.isShuffleMaskLegal(NewMask, NarrowVT)) return SDValue(); SDLoc DL(N); SmallVector NewOps; for (const std::pair &DemandedSubvector : DemandedSubvectors) { // How many elements into the WideVT does this subvector start? int Index = NumEltsExtracted * DemandedSubvector.second; SDValue IndexC = DAG.getVectorIdxConstant(Index, DL); NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT, DemandedSubvector.first, IndexC)); } assert((NewOps.size() == 1 || NewOps.size() == 2) && "Should end up with either one or two ops"); // If we ended up with only one operand, pad with an undef. if (NewOps.size() == 1) NewOps.emplace_back(DAG.getUNDEF(NarrowVT)); return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask); } SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { EVT NVT = N->getValueType(0); SDValue V = N->getOperand(0); uint64_t ExtIdx = N->getConstantOperandVal(1); // Extract from UNDEF is UNDEF. if (V.isUndef()) return DAG.getUNDEF(NVT); if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT)) if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG)) return NarrowLoad; // Combine an extract of an extract into a single extract_subvector. // ext (ext X, C), 0 --> ext X, C if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) { if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(), V.getConstantOperandVal(1)) && TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) { return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0), V.getOperand(1)); } } // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V) if (V.getOpcode() == ISD::SPLAT_VECTOR) if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse()) if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT)) return DAG.getSplatVector(NVT, SDLoc(N), V.getOperand(0)); // Try to move vector bitcast after extract_subv by scaling extraction index: // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index') if (V.getOpcode() == ISD::BITCAST && V.getOperand(0).getValueType().isVector() && (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) { SDValue SrcOp = V.getOperand(0); EVT SrcVT = SrcOp.getValueType(); unsigned SrcNumElts = SrcVT.getVectorMinNumElements(); unsigned DestNumElts = V.getValueType().getVectorMinNumElements(); if ((SrcNumElts % DestNumElts) == 0) { unsigned SrcDestRatio = SrcNumElts / DestNumElts; ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio; EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), NewExtEC); if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) { SDLoc DL(N); SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL); SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT, V.getOperand(0), NewIndex); return DAG.getBitcast(NVT, NewExtract); } } if ((DestNumElts % SrcNumElts) == 0) { unsigned DestSrcRatio = DestNumElts / SrcNumElts; if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) { ElementCount NewExtEC = NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio); EVT ScalarVT = SrcVT.getScalarType(); if ((ExtIdx % DestSrcRatio) == 0) { SDLoc DL(N); unsigned IndexValScaled = ExtIdx / DestSrcRatio; EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC); if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) { SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL); SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT, V.getOperand(0), NewIndex); return DAG.getBitcast(NVT, NewExtract); } if (NewExtEC.isScalar() && TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) { SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL); SDValue NewExtract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, V.getOperand(0), NewIndex); return DAG.getBitcast(NVT, NewExtract); } } } } } if (V.getOpcode() == ISD::CONCAT_VECTORS) { unsigned ExtNumElts = NVT.getVectorMinNumElements(); EVT ConcatSrcVT = V.getOperand(0).getValueType(); assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() && "Concat and extract subvector do not change element type"); assert((ExtIdx % ExtNumElts) == 0 && "Extract index is not a multiple of the input vector length."); unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements(); unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts; // If the concatenated source types match this extract, it's a direct // simplification: // extract_subvec (concat V1, V2, ...), i --> Vi if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount()) return V.getOperand(ConcatOpIdx); // If the concatenated source vectors are a multiple length of this extract, // then extract a fraction of one of those source vectors directly from a // concat operand. Example: // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 --> // v2i8 extract_subvec v8i8 Y, 6 if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() && ConcatSrcNumElts % ExtNumElts == 0) { SDLoc DL(N); unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts; assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts && "Trying to extract from >1 concat operand?"); assert(NewExtIdx % ExtNumElts == 0 && "Extract index is not a multiple of the input vector length."); SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, V.getOperand(ConcatOpIdx), NewIndexC); } } if (SDValue V = foldExtractSubvectorFromShuffleVector(N, DAG, TLI, LegalOperations)) return V; V = peekThroughBitcasts(V); // If the input is a build vector. Try to make a smaller build vector. if (V.getOpcode() == ISD::BUILD_VECTOR) { EVT InVT = V.getValueType(); unsigned ExtractSize = NVT.getSizeInBits(); unsigned EltSize = InVT.getScalarSizeInBits(); // Only do this if we won't split any elements. if (ExtractSize % EltSize == 0) { unsigned NumElems = ExtractSize / EltSize; EVT EltVT = InVT.getVectorElementType(); EVT ExtractVT = NumElems == 1 ? EltVT : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems); if ((Level < AfterLegalizeDAG || (NumElems == 1 || TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) && (!LegalTypes || TLI.isTypeLegal(ExtractVT))) { unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize; if (NumElems == 1) { SDValue Src = V->getOperand(IdxVal); if (EltVT != Src.getValueType()) Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src); return DAG.getBitcast(NVT, Src); } // Extract the pieces from the original build_vector. SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N), V->ops().slice(IdxVal, NumElems)); return DAG.getBitcast(NVT, BuildVec); } } } if (V.getOpcode() == ISD::INSERT_SUBVECTOR) { // Handle only simple case where vector being inserted and vector // being extracted are of same size. EVT SmallVT = V.getOperand(1).getValueType(); if (!NVT.bitsEq(SmallVT)) return SDValue(); // Combine: // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) // Into: // indices are equal or bit offsets are equal => V1 // otherwise => (extract_subvec V1, ExtIdx) uint64_t InsIdx = V.getConstantOperandVal(2); if (InsIdx * SmallVT.getScalarSizeInBits() == ExtIdx * NVT.getScalarSizeInBits()) { if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT)) return SDValue(); return DAG.getBitcast(NVT, V.getOperand(1)); } return DAG.getNode( ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)), N->getOperand(1)); } if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations)) return NarrowBOp; if (SimplifyDemandedVectorElts(SDValue(N, 0))) return SDValue(N, 0); return SDValue(); } /// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles /// followed by concatenation. Narrow vector ops may have better performance /// than wide ops, and this can unlock further narrowing of other vector ops. /// Targets can invert this transform later if it is not profitable. static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG) { SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1); if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 || N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 || !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef()) return SDValue(); // Split the wide shuffle mask into halves. Any mask element that is accessing // operand 1 is offset down to account for narrowing of the vectors. ArrayRef Mask = Shuf->getMask(); EVT VT = Shuf->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); unsigned HalfNumElts = NumElts / 2; SmallVector Mask0(HalfNumElts, -1); SmallVector Mask1(HalfNumElts, -1); for (unsigned i = 0; i != NumElts; ++i) { if (Mask[i] == -1) continue; // If we reference the upper (undef) subvector then the element is undef. if ((Mask[i] % NumElts) >= HalfNumElts) continue; int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts; if (i < HalfNumElts) Mask0[i] = M; else Mask1[i - HalfNumElts] = M; } // Ask the target if this is a valid transform. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), HalfNumElts); if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) || !TLI.isShuffleMaskLegal(Mask1, HalfVT)) return SDValue(); // shuffle (concat X, undef), (concat Y, undef), Mask --> // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1) SDValue X = N0.getOperand(0), Y = N1.getOperand(0); SDLoc DL(Shuf); SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0); SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1); return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1); } // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat, // or turn a shuffle of a single concat into simpler shuffle then concat. static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); ShuffleVectorSDNode *SVN = cast(N); ArrayRef Mask = SVN->getMask(); SmallVector Ops; EVT ConcatVT = N0.getOperand(0).getValueType(); unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements(); unsigned NumConcats = NumElts / NumElemsPerConcat; auto IsUndefMaskElt = [](int i) { return i == -1; }; // Special case: shuffle(concat(A,B)) can be more efficiently represented // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high // half vector elements. if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() && llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat), IsUndefMaskElt)) { N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1), Mask.slice(0, NumElemsPerConcat)); N1 = DAG.getUNDEF(ConcatVT); return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1); } // Look at every vector that's inserted. We're looking for exact // subvector-sized copies from a concatenated vector for (unsigned I = 0; I != NumConcats; ++I) { unsigned Begin = I * NumElemsPerConcat; ArrayRef SubMask = Mask.slice(Begin, NumElemsPerConcat); // Make sure we're dealing with a copy. if (llvm::all_of(SubMask, IsUndefMaskElt)) { Ops.push_back(DAG.getUNDEF(ConcatVT)); continue; } int OpIdx = -1; for (int i = 0; i != (int)NumElemsPerConcat; ++i) { if (IsUndefMaskElt(SubMask[i])) continue; if ((SubMask[i] % (int)NumElemsPerConcat) != i) return SDValue(); int EltOpIdx = SubMask[i] / NumElemsPerConcat; if (0 <= OpIdx && EltOpIdx != OpIdx) return SDValue(); OpIdx = EltOpIdx; } assert(0 <= OpIdx && "Unknown concat_vectors op"); if (OpIdx < (int)N0.getNumOperands()) Ops.push_back(N0.getOperand(OpIdx)); else Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands())); } return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); } // Attempt to combine a shuffle of 2 inputs of 'scalar sources' - // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. // // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always // a simplification in some sense, but it isn't appropriate in general: some // BUILD_VECTORs are substantially cheaper than others. The general case // of a BUILD_VECTOR requires inserting each element individually (or // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of // all constants is a single constant pool load. A BUILD_VECTOR where each // element is identical is a splat. A BUILD_VECTOR where most of the operands // are undef lowers to a small number of element insertions. // // To deal with this, we currently use a bunch of mostly arbitrary heuristics. // We don't fold shuffles where one side is a non-zero constant, and we don't // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate // non-constant operands. This seems to work out reasonably well in practice. static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI) { EVT VT = SVN->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); SDValue N0 = SVN->getOperand(0); SDValue N1 = SVN->getOperand(1); if (!N0->hasOneUse()) return SDValue(); // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as // discussed above. if (!N1.isUndef()) { if (!N1->hasOneUse()) return SDValue(); bool N0AnyConst = isAnyConstantBuildVector(N0); bool N1AnyConst = isAnyConstantBuildVector(N1); if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode())) return SDValue(); if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode())) return SDValue(); } // If both inputs are splats of the same value then we can safely merge this // to a single BUILD_VECTOR with undef elements based on the shuffle mask. bool IsSplat = false; auto *BV0 = dyn_cast(N0); auto *BV1 = dyn_cast(N1); if (BV0 && BV1) if (SDValue Splat0 = BV0->getSplatValue()) IsSplat = (Splat0 == BV1->getSplatValue()); SmallVector Ops; SmallSet DuplicateOps; for (int M : SVN->getMask()) { SDValue Op = DAG.getUNDEF(VT.getScalarType()); if (M >= 0) { int Idx = M < (int)NumElts ? M : M - NumElts; SDValue &S = (M < (int)NumElts ? N0 : N1); if (S.getOpcode() == ISD::BUILD_VECTOR) { Op = S.getOperand(Idx); } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) { SDValue Op0 = S.getOperand(0); Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType()); } else { // Operand can't be combined - bail out. return SDValue(); } } // Don't duplicate a non-constant BUILD_VECTOR operand unless we're // generating a splat; semantically, this is fine, but it's likely to // generate low-quality code if the target can't reconstruct an appropriate // shuffle. if (!Op.isUndef() && !isIntOrFPConstant(Op)) if (!IsSplat && !DuplicateOps.insert(Op).second) return SDValue(); Ops.push_back(Op); } // BUILD_VECTOR requires all inputs to be of the same type, find the // maximum type and extend them all. EVT SVT = VT.getScalarType(); if (SVT.isInteger()) for (SDValue &Op : Ops) SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT); if (SVT != VT.getScalarType()) for (SDValue &Op : Ops) Op = Op.isUndef() ? DAG.getUNDEF(SVT) : (TLI.isZExtFree(Op.getValueType(), SVT) ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT) : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT)); return DAG.getBuildVector(VT, SDLoc(SVN), Ops); } // Match shuffles that can be converted to any_vector_extend_in_reg. // This is often generated during legalization. // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)) // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case. static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations) { EVT VT = SVN->getValueType(0); bool IsBigEndian = DAG.getDataLayout().isBigEndian(); // TODO Add support for big-endian when we have a test case. if (!VT.isInteger() || IsBigEndian) return SDValue(); unsigned NumElts = VT.getVectorNumElements(); unsigned EltSizeInBits = VT.getScalarSizeInBits(); ArrayRef Mask = SVN->getMask(); SDValue N0 = SVN->getOperand(0); // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32)) auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) { for (unsigned i = 0; i != NumElts; ++i) { if (Mask[i] < 0) continue; if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale)) continue; return false; } return true; }; // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for // power-of-2 extensions as they are the most likely. for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) { // Check for non power of 2 vector sizes if (NumElts % Scale != 0) continue; if (!isAnyExtend(Scale)) continue; EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale); EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale); // Never create an illegal type. Only create unsupported operations if we // are pre-legalization. if (TLI.isTypeLegal(OutVT)) if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT)) return DAG.getBitcast(VT, DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, SDLoc(SVN), OutVT, N0)); } return SDValue(); } // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of // each source element of a large type into the lowest elements of a smaller // destination type. This is often generated during legalization. // If the source node itself was a '*_extend_vector_inreg' node then we should // then be able to remove it. static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG) { EVT VT = SVN->getValueType(0); bool IsBigEndian = DAG.getDataLayout().isBigEndian(); // TODO Add support for big-endian when we have a test case. if (!VT.isInteger() || IsBigEndian) return SDValue(); SDValue N0 = peekThroughBitcasts(SVN->getOperand(0)); unsigned Opcode = N0.getOpcode(); if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG && Opcode != ISD::SIGN_EXTEND_VECTOR_INREG && Opcode != ISD::ZERO_EXTEND_VECTOR_INREG) return SDValue(); SDValue N00 = N0.getOperand(0); ArrayRef Mask = SVN->getMask(); unsigned NumElts = VT.getVectorNumElements(); unsigned EltSizeInBits = VT.getScalarSizeInBits(); unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits(); unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits(); if (ExtDstSizeInBits % ExtSrcSizeInBits != 0) return SDValue(); unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits; // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1> // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1> // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1> auto isTruncate = [&Mask, &NumElts](unsigned Scale) { for (unsigned i = 0; i != NumElts; ++i) { if (Mask[i] < 0) continue; if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale)) continue; return false; } return true; }; // At the moment we just handle the case where we've truncated back to the // same size as before the extension. // TODO: handle more extension/truncation cases as cases arise. if (EltSizeInBits != ExtSrcSizeInBits) return SDValue(); // We can remove *extend_vector_inreg only if the truncation happens at // the same scale as the extension. if (isTruncate(ExtScale)) return DAG.getBitcast(VT, N00); return SDValue(); } // Combine shuffles of splat-shuffles of the form: // shuffle (shuffle V, undef, splat-mask), undef, M // If splat-mask contains undef elements, we need to be careful about // introducing undef's in the folded mask which are not the result of composing // the masks of the shuffles. static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG) { if (!Shuf->getOperand(1).isUndef()) return SDValue(); // If the inner operand is a known splat with no undefs, just return that directly. // TODO: Create DemandedElts mask from Shuf's mask. // TODO: Allow undef elements and merge with the shuffle code below. if (DAG.isSplatValue(Shuf->getOperand(0), /*AllowUndefs*/ false)) return Shuf->getOperand(0); auto *Splat = dyn_cast(Shuf->getOperand(0)); if (!Splat || !Splat->isSplat()) return SDValue(); ArrayRef ShufMask = Shuf->getMask(); ArrayRef SplatMask = Splat->getMask(); assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch"); // Prefer simplifying to the splat-shuffle, if possible. This is legal if // every undef mask element in the splat-shuffle has a corresponding undef // element in the user-shuffle's mask or if the composition of mask elements // would result in undef. // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask): // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u] // In this case it is not legal to simplify to the splat-shuffle because we // may be exposing the users of the shuffle an undef element at index 1 // which was not there before the combine. // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u] // In this case the composition of masks yields SplatMask, so it's ok to // simplify to the splat-shuffle. // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u] // In this case the composed mask includes all undef elements of SplatMask // and in addition sets element zero to undef. It is safe to simplify to // the splat-shuffle. auto CanSimplifyToExistingSplat = [](ArrayRef UserMask, ArrayRef SplatMask) { for (unsigned i = 0, e = UserMask.size(); i != e; ++i) if (UserMask[i] != -1 && SplatMask[i] == -1 && SplatMask[UserMask[i]] != -1) return false; return true; }; if (CanSimplifyToExistingSplat(ShufMask, SplatMask)) return Shuf->getOperand(0); // Create a new shuffle with a mask that is composed of the two shuffles' // masks. SmallVector NewMask; for (int Idx : ShufMask) NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]); return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat), Splat->getOperand(0), Splat->getOperand(1), NewMask); } // Combine shuffles of bitcasts into a shuffle of the bitcast type, providing // the mask can be treated as a larger type. static SDValue combineShuffleOfBitcast(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations) { SDValue Op0 = SVN->getOperand(0); SDValue Op1 = SVN->getOperand(1); EVT VT = SVN->getValueType(0); if (Op0.getOpcode() != ISD::BITCAST) return SDValue(); EVT InVT = Op0.getOperand(0).getValueType(); if (!InVT.isVector() || (!Op1.isUndef() && (Op1.getOpcode() != ISD::BITCAST || Op1.getOperand(0).getValueType() != InVT))) return SDValue(); if (isAnyConstantBuildVector(Op0.getOperand(0)) && (Op1.isUndef() || isAnyConstantBuildVector(Op1.getOperand(0)))) return SDValue(); int VTLanes = VT.getVectorNumElements(); int InLanes = InVT.getVectorNumElements(); if (VTLanes <= InLanes || VTLanes % InLanes != 0 || (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, InVT))) return SDValue(); int Factor = VTLanes / InLanes; // Check that each group of lanes in the mask are either undef or make a valid // mask for the wider lane type. ArrayRef Mask = SVN->getMask(); SmallVector NewMask; if (!widenShuffleMaskElts(Factor, Mask, NewMask)) return SDValue(); if (!TLI.isShuffleMaskLegal(NewMask, InVT)) return SDValue(); // Create the new shuffle with the new mask and bitcast it back to the // original type. SDLoc DL(SVN); Op0 = Op0.getOperand(0); Op1 = Op1.isUndef() ? DAG.getUNDEF(InVT) : Op1.getOperand(0); SDValue NewShuf = DAG.getVectorShuffle(InVT, DL, Op0, Op1, NewMask); return DAG.getBitcast(VT, NewShuf); } /// Combine shuffle of shuffle of the form: /// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf, SelectionDAG &DAG) { if (!OuterShuf->getOperand(1).isUndef()) return SDValue(); auto *InnerShuf = dyn_cast(OuterShuf->getOperand(0)); if (!InnerShuf || !InnerShuf->getOperand(1).isUndef()) return SDValue(); ArrayRef OuterMask = OuterShuf->getMask(); ArrayRef InnerMask = InnerShuf->getMask(); unsigned NumElts = OuterMask.size(); assert(NumElts == InnerMask.size() && "Mask length mismatch"); SmallVector CombinedMask(NumElts, -1); int SplatIndex = -1; for (unsigned i = 0; i != NumElts; ++i) { // Undef lanes remain undef. int OuterMaskElt = OuterMask[i]; if (OuterMaskElt == -1) continue; // Peek through the shuffle masks to get the underlying source element. int InnerMaskElt = InnerMask[OuterMaskElt]; if (InnerMaskElt == -1) continue; // Initialize the splatted element. if (SplatIndex == -1) SplatIndex = InnerMaskElt; // Non-matching index - this is not a splat. if (SplatIndex != InnerMaskElt) return SDValue(); CombinedMask[i] = InnerMaskElt; } assert((all_of(CombinedMask, [](int M) { return M == -1; }) || getSplatIndex(CombinedMask) != -1) && "Expected a splat mask"); // TODO: The transform may be a win even if the mask is not legal. EVT VT = OuterShuf->getValueType(0); assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types"); if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT)) return SDValue(); return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0), InnerShuf->getOperand(1), CombinedMask); } /// If the shuffle mask is taking exactly one element from the first vector /// operand and passing through all other elements from the second vector /// operand, return the index of the mask element that is choosing an element /// from the first operand. Otherwise, return -1. static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef Mask) { int MaskSize = Mask.size(); int EltFromOp0 = -1; // TODO: This does not match if there are undef elements in the shuffle mask. // Should we ignore undefs in the shuffle mask instead? The trade-off is // removing an instruction (a shuffle), but losing the knowledge that some // vector lanes are not needed. for (int i = 0; i != MaskSize; ++i) { if (Mask[i] >= 0 && Mask[i] < MaskSize) { // We're looking for a shuffle of exactly one element from operand 0. if (EltFromOp0 != -1) return -1; EltFromOp0 = i; } else if (Mask[i] != i + MaskSize) { // Nothing from operand 1 can change lanes. return -1; } } return EltFromOp0; } /// If a shuffle inserts exactly one element from a source vector operand into /// another vector operand and we can access the specified element as a scalar, /// then we can eliminate the shuffle. static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG) { // First, check if we are taking one element of a vector and shuffling that // element into another vector. ArrayRef Mask = Shuf->getMask(); SmallVector CommutedMask(Mask.begin(), Mask.end()); SDValue Op0 = Shuf->getOperand(0); SDValue Op1 = Shuf->getOperand(1); int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask); if (ShufOp0Index == -1) { // Commute mask and check again. ShuffleVectorSDNode::commuteMask(CommutedMask); ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask); if (ShufOp0Index == -1) return SDValue(); // Commute operands to match the commuted shuffle mask. std::swap(Op0, Op1); Mask = CommutedMask; } // The shuffle inserts exactly one element from operand 0 into operand 1. // Now see if we can access that element as a scalar via a real insert element // instruction. // TODO: We can try harder to locate the element as a scalar. Examples: it // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant. assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() && "Shuffle mask value must be from operand 0"); if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT) return SDValue(); auto *InsIndexC = dyn_cast(Op0.getOperand(2)); if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index]) return SDValue(); // There's an existing insertelement with constant insertion index, so we // don't need to check the legality/profitability of a replacement operation // that differs at most in the constant value. The target should be able to // lower any of those in a similar way. If not, legalization will expand this // to a scalar-to-vector plus shuffle. // // Note that the shuffle may move the scalar from the position that the insert // element used. Therefore, our new insert element occurs at the shuffle's // mask index value, not the insert's index value. // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C' SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf)); return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(), Op1, Op0.getOperand(1), NewInsIndex); } /// If we have a unary shuffle of a shuffle, see if it can be folded away /// completely. This has the potential to lose undef knowledge because the first /// shuffle may not have an undef mask element where the second one does. So /// only call this after doing simplifications based on demanded elements. static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) { // shuf (shuf0 X, Y, Mask0), undef, Mask auto *Shuf0 = dyn_cast(Shuf->getOperand(0)); if (!Shuf0 || !Shuf->getOperand(1).isUndef()) return SDValue(); ArrayRef Mask = Shuf->getMask(); ArrayRef Mask0 = Shuf0->getMask(); for (int i = 0, e = (int)Mask.size(); i != e; ++i) { // Ignore undef elements. if (Mask[i] == -1) continue; assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value"); // Is the element of the shuffle operand chosen by this shuffle the same as // the element chosen by the shuffle operand itself? if (Mask0[Mask[i]] != Mask0[i]) return SDValue(); } // Every element of this shuffle is identical to the result of the previous // shuffle, so we can replace this value. return Shuf->getOperand(0); } SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { EVT VT = N->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG"); // Canonicalize shuffle undef, undef -> undef if (N0.isUndef() && N1.isUndef()) return DAG.getUNDEF(VT); ShuffleVectorSDNode *SVN = cast(N); // Canonicalize shuffle v, v -> v, undef if (N0 == N1) return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), createUnaryMask(SVN->getMask(), NumElts)); // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. if (N0.isUndef()) return DAG.getCommutedVectorShuffle(*SVN); // Remove references to rhs if it is undef if (N1.isUndef()) { bool Changed = false; SmallVector NewMask; for (unsigned i = 0; i != NumElts; ++i) { int Idx = SVN->getMaskElt(i); if (Idx >= (int)NumElts) { Idx = -1; Changed = true; } NewMask.push_back(Idx); } if (Changed) return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask); } if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG)) return InsElt; // A shuffle of a single vector that is a splatted value can always be folded. if (SDValue V = combineShuffleOfSplatVal(SVN, DAG)) return V; if (SDValue V = formSplatFromShuffles(SVN, DAG)) return V; // If it is a splat, check if the argument vector is another splat or a // build_vector. if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { int SplatIndex = SVN->getSplatIndex(); if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) && TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) { // splat (vector_bo L, R), Index --> // splat (scalar_bo (extelt L, Index), (extelt R, Index)) SDValue L = N0.getOperand(0), R = N0.getOperand(1); SDLoc DL(N); EVT EltVT = VT.getScalarType(); SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL); SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index); SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index); SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags()); SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO); SmallVector ZeroMask(VT.getVectorNumElements(), 0); return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask); } // splat(scalar_to_vector(x), 0) -> build_vector(x,...,x) // splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x) if ((!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) && N0.hasOneUse()) { if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && SplatIndex == 0) return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(0)); if (N0.getOpcode() == ISD::INSERT_VECTOR_ELT) if (auto *Idx = dyn_cast(N0.getOperand(2))) if (Idx->getAPIntValue() == SplatIndex) return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1)); } // If this is a bit convert that changes the element type of the vector but // not the number of vector elements, look through it. Be careful not to // look though conversions that change things like v4f32 to v2f64. SDNode *V = N0.getNode(); if (V->getOpcode() == ISD::BITCAST) { SDValue ConvInput = V->getOperand(0); if (ConvInput.getValueType().isVector() && ConvInput.getValueType().getVectorNumElements() == NumElts) V = ConvInput.getNode(); } if (V->getOpcode() == ISD::BUILD_VECTOR) { assert(V->getNumOperands() == NumElts && "BUILD_VECTOR has wrong number of operands"); SDValue Base; bool AllSame = true; for (unsigned i = 0; i != NumElts; ++i) { if (!V->getOperand(i).isUndef()) { Base = V->getOperand(i); break; } } // Splat of , return if (!Base.getNode()) return N0; for (unsigned i = 0; i != NumElts; ++i) { if (V->getOperand(i) != Base) { AllSame = false; break; } } // Splat of , return if (AllSame) return N0; // Canonicalize any other splat as a build_vector. SDValue Splatted = V->getOperand(SplatIndex); SmallVector Ops(NumElts, Splatted); SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops); // We may have jumped through bitcasts, so the type of the // BUILD_VECTOR may not match the type of the shuffle. if (V->getValueType(0) != VT) NewBV = DAG.getBitcast(VT, NewBV); return NewBV; } } // Simplify source operands based on shuffle mask. if (SimplifyDemandedVectorElts(SDValue(N, 0))) return SDValue(N, 0); // This is intentionally placed after demanded elements simplification because // it could eliminate knowledge of undef elements created by this shuffle. if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN)) return ShufOp; // Match shuffles that can be converted to any_vector_extend_in_reg. if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations)) return V; // Combine "truncate_vector_in_reg" style shuffles. if (SDValue V = combineTruncationShuffle(SVN, DAG)) return V; if (N0.getOpcode() == ISD::CONCAT_VECTORS && Level < AfterLegalizeVectorOps && (N1.isUndef() || (N1.getOpcode() == ISD::CONCAT_VECTORS && N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) { if (SDValue V = partitionShuffleOfConcats(N, DAG)) return V; } // A shuffle of a concat of the same narrow vector can be reduced to use // only low-half elements of a concat with undef: // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask' if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() && N0.getNumOperands() == 2 && N0.getOperand(0) == N0.getOperand(1)) { int HalfNumElts = (int)NumElts / 2; SmallVector NewMask; for (unsigned i = 0; i != NumElts; ++i) { int Idx = SVN->getMaskElt(i); if (Idx >= HalfNumElts) { assert(Idx < (int)NumElts && "Shuffle mask chooses undef op"); Idx -= HalfNumElts; } NewMask.push_back(Idx); } if (TLI.isShuffleMaskLegal(NewMask, VT)) { SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType()); SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0.getOperand(0), UndefVec); return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask); } } // See if we can replace a shuffle with an insert_subvector. // e.g. v2i32 into v8i32: // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7). // --> insert_subvector(lhs,rhs1,4). if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) && TLI.isOperationLegalOrCustom(ISD::INSERT_SUBVECTOR, VT)) { auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef Mask) { // Ensure RHS subvectors are legal. assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors"); EVT SubVT = RHS.getOperand(0).getValueType(); int NumSubVecs = RHS.getNumOperands(); int NumSubElts = SubVT.getVectorNumElements(); assert((NumElts % NumSubElts) == 0 && "Subvector mismatch"); if (!TLI.isTypeLegal(SubVT)) return SDValue(); // Don't bother if we have an unary shuffle (matches undef + LHS elts). if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; })) return SDValue(); // Search [NumSubElts] spans for RHS sequence. // TODO: Can we avoid nested loops to increase performance? SmallVector InsertionMask(NumElts); for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) { for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) { // Reset mask to identity. std::iota(InsertionMask.begin(), InsertionMask.end(), 0); // Add subvector insertion. std::iota(InsertionMask.begin() + SubIdx, InsertionMask.begin() + SubIdx + NumSubElts, NumElts + (SubVec * NumSubElts)); // See if the shuffle mask matches the reference insertion mask. bool MatchingShuffle = true; for (int i = 0; i != (int)NumElts; ++i) { int ExpectIdx = InsertionMask[i]; int ActualIdx = Mask[i]; if (0 <= ActualIdx && ExpectIdx != ActualIdx) { MatchingShuffle = false; break; } } if (MatchingShuffle) return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS, RHS.getOperand(SubVec), DAG.getVectorIdxConstant(SubIdx, SDLoc(N))); } } return SDValue(); }; ArrayRef Mask = SVN->getMask(); if (N1.getOpcode() == ISD::CONCAT_VECTORS) if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask)) return InsertN1; if (N0.getOpcode() == ISD::CONCAT_VECTORS) { SmallVector CommuteMask(Mask.begin(), Mask.end()); ShuffleVectorSDNode::commuteMask(CommuteMask); if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask)) return InsertN0; } } // If we're not performing a select/blend shuffle, see if we can convert the // shuffle into a AND node, with all the out-of-lane elements are known zero. if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) { bool IsInLaneMask = true; ArrayRef Mask = SVN->getMask(); SmallVector ClearMask(NumElts, -1); APInt DemandedLHS = APInt::getNullValue(NumElts); APInt DemandedRHS = APInt::getNullValue(NumElts); for (int I = 0; I != (int)NumElts; ++I) { int M = Mask[I]; if (M < 0) continue; ClearMask[I] = M == I ? I : (I + NumElts); IsInLaneMask &= (M == I) || (M == (int)(I + NumElts)); if (M != I) { APInt &Demanded = M < (int)NumElts ? DemandedLHS : DemandedRHS; Demanded.setBit(M % NumElts); } } // TODO: Should we try to mask with N1 as well? if (!IsInLaneMask && (!DemandedLHS.isNullValue() || !DemandedRHS.isNullValue()) && (DemandedLHS.isNullValue() || DAG.MaskedVectorIsZero(N0, DemandedLHS)) && (DemandedRHS.isNullValue() || DAG.MaskedVectorIsZero(N1, DemandedRHS))) { SDLoc DL(N); EVT IntVT = VT.changeVectorElementTypeToInteger(); EVT IntSVT = VT.getVectorElementType().changeTypeToInteger(); // Transform the type to a legal type so that the buildvector constant // elements are not illegal. Make sure that the result is larger than the // original type, incase the value is split into two (eg i64->i32). if (!TLI.isTypeLegal(IntSVT) && LegalTypes) IntSVT = TLI.getTypeToTransformTo(*DAG.getContext(), IntSVT); if (IntSVT.getSizeInBits() >= IntVT.getScalarSizeInBits()) { SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT); SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT); SmallVector AndMask(NumElts, DAG.getUNDEF(IntSVT)); for (int I = 0; I != (int)NumElts; ++I) if (0 <= Mask[I]) AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt; // See if a clear mask is legal instead of going via // XformToShuffleWithZero which loses UNDEF mask elements. if (TLI.isVectorClearMaskLegal(ClearMask, IntVT)) return DAG.getBitcast( VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0), DAG.getConstant(0, DL, IntVT), ClearMask)); if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT)) return DAG.getBitcast( VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0), DAG.getBuildVector(IntVT, DL, AndMask))); } } } // Attempt to combine a shuffle of 2 inputs of 'scalar sources' - // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI)) return Res; // If this shuffle only has a single input that is a bitcasted shuffle, // attempt to merge the 2 shuffles and suitably bitcast the inputs/output // back to their original types. if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && N1.isUndef() && Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) { SDValue BC0 = peekThroughOneUseBitcasts(N0); if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) { EVT SVT = VT.getScalarType(); EVT InnerVT = BC0->getValueType(0); EVT InnerSVT = InnerVT.getScalarType(); // Determine which shuffle works with the smaller scalar type. EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT; EVT ScaleSVT = ScaleVT.getScalarType(); if (TLI.isTypeLegal(ScaleVT) && 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) && 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) { int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits(); int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits(); // Scale the shuffle masks to the smaller scalar type. ShuffleVectorSDNode *InnerSVN = cast(BC0); SmallVector InnerMask; SmallVector OuterMask; narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask); narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask); // Merge the shuffle masks. SmallVector NewMask; for (int M : OuterMask) NewMask.push_back(M < 0 ? -1 : InnerMask[M]); // Test for shuffle mask legality over both commutations. SDValue SV0 = BC0->getOperand(0); SDValue SV1 = BC0->getOperand(1); bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT); if (!LegalMask) { std::swap(SV0, SV1); ShuffleVectorSDNode::commuteMask(NewMask); LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT); } if (LegalMask) { SV0 = DAG.getBitcast(ScaleVT, SV0); SV1 = DAG.getBitcast(ScaleVT, SV1); return DAG.getBitcast( VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask)); } } } } // Match shuffles of bitcasts, so long as the mask can be treated as the // larger type. if (SDValue V = combineShuffleOfBitcast(SVN, DAG, TLI, LegalOperations)) return V; // Compute the combined shuffle mask for a shuffle with SV0 as the first // operand, and SV1 as the second operand. // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false // Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true auto MergeInnerShuffle = [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN, ShuffleVectorSDNode *OtherSVN, SDValue N1, const TargetLowering &TLI, SDValue &SV0, SDValue &SV1, SmallVectorImpl &Mask) -> bool { // Don't try to fold splats; they're likely to simplify somehow, or they // might be free. if (OtherSVN->isSplat()) return false; SV0 = SV1 = SDValue(); Mask.clear(); for (unsigned i = 0; i != NumElts; ++i) { int Idx = SVN->getMaskElt(i); if (Idx < 0) { // Propagate Undef. Mask.push_back(Idx); continue; } if (Commute) Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts); SDValue CurrentVec; if (Idx < (int)NumElts) { // This shuffle index refers to the inner shuffle N0. Lookup the inner // shuffle mask to identify which vector is actually referenced. Idx = OtherSVN->getMaskElt(Idx); if (Idx < 0) { // Propagate Undef. Mask.push_back(Idx); continue; } CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0) : OtherSVN->getOperand(1); } else { // This shuffle index references an element within N1. CurrentVec = N1; } // Simple case where 'CurrentVec' is UNDEF. if (CurrentVec.isUndef()) { Mask.push_back(-1); continue; } // Canonicalize the shuffle index. We don't know yet if CurrentVec // will be the first or second operand of the combined shuffle. Idx = Idx % NumElts; if (!SV0.getNode() || SV0 == CurrentVec) { // Ok. CurrentVec is the left hand side. // Update the mask accordingly. SV0 = CurrentVec; Mask.push_back(Idx); continue; } if (!SV1.getNode() || SV1 == CurrentVec) { // Ok. CurrentVec is the right hand side. // Update the mask accordingly. SV1 = CurrentVec; Mask.push_back(Idx + NumElts); continue; } // Last chance - see if the vector is another shuffle and if it // uses one of the existing candidate shuffle ops. if (auto *CurrentSVN = dyn_cast(CurrentVec)) { int InnerIdx = CurrentSVN->getMaskElt(Idx); if (InnerIdx < 0) { Mask.push_back(-1); continue; } SDValue InnerVec = (InnerIdx < (int)NumElts) ? CurrentSVN->getOperand(0) : CurrentSVN->getOperand(1); if (InnerVec.isUndef()) { Mask.push_back(-1); continue; } InnerIdx %= NumElts; if (InnerVec == SV0) { Mask.push_back(InnerIdx); continue; } if (InnerVec == SV1) { Mask.push_back(InnerIdx + NumElts); continue; } } // Bail out if we cannot convert the shuffle pair into a single shuffle. return false; } if (llvm::all_of(Mask, [](int M) { return M < 0; })) return true; // Avoid introducing shuffles with illegal mask. // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2) if (TLI.isShuffleMaskLegal(Mask, VT)) return true; std::swap(SV0, SV1); ShuffleVectorSDNode::commuteMask(Mask); return TLI.isShuffleMaskLegal(Mask, VT); }; if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) { // Canonicalize shuffles according to rules: // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A) // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B) // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B) if (N1.getOpcode() == ISD::VECTOR_SHUFFLE && N0.getOpcode() != ISD::VECTOR_SHUFFLE) { // The incoming shuffle must be of the same type as the result of the // current shuffle. assert(N1->getOperand(0).getValueType() == VT && "Shuffle types don't match"); SDValue SV0 = N1->getOperand(0); SDValue SV1 = N1->getOperand(1); bool HasSameOp0 = N0 == SV0; bool IsSV1Undef = SV1.isUndef(); if (HasSameOp0 || IsSV1Undef || N0 == SV1) // Commute the operands of this shuffle so merging below will trigger. return DAG.getCommutedVectorShuffle(*SVN); } // Canonicalize splat shuffles to the RHS to improve merging below. // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u)) if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N1.getOpcode() == ISD::VECTOR_SHUFFLE && cast(N0)->isSplat() && !cast(N1)->isSplat()) { return DAG.getCommutedVectorShuffle(*SVN); } // Try to fold according to rules: // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) // Don't try to fold shuffles with illegal type. // Only fold if this shuffle is the only user of the other shuffle. // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well. for (int i = 0; i != 2; ++i) { if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N->getOperand(i).getNode())) { // The incoming shuffle must be of the same type as the result of the // current shuffle. auto *OtherSV = cast(N->getOperand(i)); assert(OtherSV->getOperand(0).getValueType() == VT && "Shuffle types don't match"); SDValue SV0, SV1; SmallVector Mask; if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI, SV0, SV1, Mask)) { // Check if all indices in Mask are Undef. In case, propagate Undef. if (llvm::all_of(Mask, [](int M) { return M < 0; })) return DAG.getUNDEF(VT); return DAG.getVectorShuffle(VT, SDLoc(N), SV0 ? SV0 : DAG.getUNDEF(VT), SV1 ? SV1 : DAG.getUNDEF(VT), Mask); } } } // Merge shuffles through binops if we are able to merge it with at least // one other shuffles. // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef) // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d))) unsigned SrcOpcode = N0.getOpcode(); if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) && (N1.isUndef() || (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) { // Get binop source ops, or just pass on the undef. SDValue Op00 = N0.getOperand(0); SDValue Op01 = N0.getOperand(1); SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0); SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1); // TODO: We might be able to relax the VT check but we don't currently // have any isBinOp() that has different result/ops VTs so play safe until // we have test coverage. if (Op00.getValueType() == VT && Op10.getValueType() == VT && Op01.getValueType() == VT && Op11.getValueType() == VT && (Op00.getOpcode() == ISD::VECTOR_SHUFFLE || Op10.getOpcode() == ISD::VECTOR_SHUFFLE || Op01.getOpcode() == ISD::VECTOR_SHUFFLE || Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) { auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1, SmallVectorImpl &Mask, bool LeftOp, bool Commute) { SDValue InnerN = Commute ? N1 : N0; SDValue Op0 = LeftOp ? Op00 : Op01; SDValue Op1 = LeftOp ? Op10 : Op11; if (Commute) std::swap(Op0, Op1); // Only accept the merged shuffle if we don't introduce undef elements, // or the inner shuffle already contained undef elements. auto *SVN0 = dyn_cast(Op0); return SVN0 && InnerN->isOnlyUserOf(SVN0) && MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1, Mask) && (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) || llvm::none_of(Mask, [](int M) { return M < 0; })); }; // Ensure we don't increase the number of shuffles - we must merge a // shuffle from at least one of the LHS and RHS ops. bool MergedLeft = false; SDValue LeftSV0, LeftSV1; SmallVector LeftMask; if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) || CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) { MergedLeft = true; } else { LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end()); LeftSV0 = Op00, LeftSV1 = Op10; } bool MergedRight = false; SDValue RightSV0, RightSV1; SmallVector RightMask; if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) || CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) { MergedRight = true; } else { RightMask.assign(SVN->getMask().begin(), SVN->getMask().end()); RightSV0 = Op01, RightSV1 = Op11; } if (MergedLeft || MergedRight) { SDLoc DL(N); SDValue LHS = DAG.getVectorShuffle( VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT), LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask); SDValue RHS = DAG.getVectorShuffle( VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT), RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask); return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS); } } } } if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG)) return V; return SDValue(); } SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) { SDValue InVal = N->getOperand(0); EVT VT = N->getValueType(0); // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern // with a VECTOR_SHUFFLE and possible truncate. if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT && VT.isFixedLengthVector() && InVal->getOperand(0).getValueType().isFixedLengthVector()) { SDValue InVec = InVal->getOperand(0); SDValue EltNo = InVal->getOperand(1); auto InVecT = InVec.getValueType(); if (ConstantSDNode *C0 = dyn_cast(EltNo)) { SmallVector NewMask(InVecT.getVectorNumElements(), -1); int Elt = C0->getZExtValue(); NewMask[0] = Elt; // If we have an implict truncate do truncate here as long as it's legal. // if it's not legal, this should if (VT.getScalarType() != InVal.getValueType() && InVal.getValueType().isScalarInteger() && isTypeLegal(VT.getScalarType())) { SDValue Val = DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal); return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val); } if (VT.getScalarType() == InVecT.getScalarType() && VT.getVectorNumElements() <= InVecT.getVectorNumElements()) { SDValue LegalShuffle = TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec, DAG.getUNDEF(InVecT), NewMask, DAG); if (LegalShuffle) { // If the initial vector is the correct size this shuffle is a // valid result. if (VT == InVecT) return LegalShuffle; // If not we must truncate the vector. if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) { SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N)); EVT SubVT = EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(), VT.getVectorNumElements()); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, LegalShuffle, ZeroIdx); } } } } } return SDValue(); } SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { EVT VT = N->getValueType(0); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); uint64_t InsIdx = N->getConstantOperandVal(2); // If inserting an UNDEF, just return the original vector. if (N1.isUndef()) return N0; // If this is an insert of an extracted vector into an undef vector, we can // just use the input to the extract. if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT) return N1.getOperand(0); // Simplify scalar inserts into an undef vector: // insert_subvector undef, (splat X), N2 -> splat X if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR) return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0)); // If we are inserting a bitcast value into an undef, with the same // number of elements, just use the bitcast input of the extract. // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 -> // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2) if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST && N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR && N1.getOperand(0).getOperand(1) == N2 && N1.getOperand(0).getOperand(0).getValueType().getVectorElementCount() == VT.getVectorElementCount() && N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() == VT.getSizeInBits()) { return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0)); } // If both N1 and N2 are bitcast values on which insert_subvector // would makes sense, pull the bitcast through. // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 -> // BITCAST (INSERT_SUBVECTOR N0 N1 N2) if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) { SDValue CN0 = N0.getOperand(0); SDValue CN1 = N1.getOperand(0); EVT CN0VT = CN0.getValueType(); EVT CN1VT = CN1.getValueType(); if (CN0VT.isVector() && CN1VT.isVector() && CN0VT.getVectorElementType() == CN1VT.getVectorElementType() && CN0VT.getVectorElementCount() == VT.getVectorElementCount()) { SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), CN0.getValueType(), CN0, CN1, N2); return DAG.getBitcast(VT, NewINSERT); } } // Combine INSERT_SUBVECTORs where we are inserting to the same index. // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx ) // --> INSERT_SUBVECTOR( Vec, SubNew, Idx ) if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.getOperand(1).getValueType() == N1.getValueType() && N0.getOperand(2) == N2) return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0), N1, N2); // Eliminate an intermediate insert into an undef vector: // insert_subvector undef, (insert_subvector undef, X, 0), N2 --> // insert_subvector undef, X, N2 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR && N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2))) return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0, N1.getOperand(1), N2); // Push subvector bitcasts to the output, adjusting the index as we go. // insert_subvector(bitcast(v), bitcast(s), c1) // -> bitcast(insert_subvector(v, s, c2)) if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) && N1.getOpcode() == ISD::BITCAST) { SDValue N0Src = peekThroughBitcasts(N0); SDValue N1Src = peekThroughBitcasts(N1); EVT N0SrcSVT = N0Src.getValueType().getScalarType(); EVT N1SrcSVT = N1Src.getValueType().getScalarType(); if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) && N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) { EVT NewVT; SDLoc DL(N); SDValue NewIdx; LLVMContext &Ctx = *DAG.getContext(); ElementCount NumElts = VT.getVectorElementCount(); unsigned EltSizeInBits = VT.getScalarSizeInBits(); if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) { unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits(); NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale); NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL); } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) { unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits; if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) { NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts.divideCoefficientBy(Scale)); NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL); } } if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) { SDValue Res = DAG.getBitcast(NewVT, N0Src); Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx); return DAG.getBitcast(VT, Res); } } } // Canonicalize insert_subvector dag nodes. // Example: // (insert_subvector (insert_subvector A, Idx0), Idx1) // -> (insert_subvector (insert_subvector A, Idx1), Idx0) if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() && N1.getValueType() == N0.getOperand(1).getValueType()) { unsigned OtherIdx = N0.getConstantOperandVal(2); if (InsIdx < OtherIdx) { // Swap nodes. SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0), N1, N2); AddToWorklist(NewOp.getNode()); return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()), VT, NewOp, N0.getOperand(1), N0.getOperand(2)); } } // If the input vector is a concatenation, and the insert replaces // one of the pieces, we can optimize into a single concat_vectors. if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() && N0.getOperand(0).getValueType() == N1.getValueType() && N0.getOperand(0).getValueType().isScalableVector() == N1.getValueType().isScalableVector()) { unsigned Factor = N1.getValueType().getVectorMinNumElements(); SmallVector Ops(N0->op_begin(), N0->op_end()); Ops[InsIdx / Factor] = N1; return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); } // Simplify source operands based on insertion. if (SimplifyDemandedVectorElts(SDValue(N, 0))) return SDValue(N, 0); return SDValue(); } SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) { SDValue N0 = N->getOperand(0); // fold (fp_to_fp16 (fp16_to_fp op)) -> op if (N0->getOpcode() == ISD::FP16_TO_FP) return N0->getOperand(0); return SDValue(); } SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) { SDValue N0 = N->getOperand(0); // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) { ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1)); if (AndConst && AndConst->getAPIntValue() == 0xffff) { return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0), N0.getOperand(0)); } } return SDValue(); } SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) { SDValue N0 = N->getOperand(0); // fold (fp_to_bf16 (bf16_to_fp op)) -> op if (N0->getOpcode() == ISD::BF16_TO_FP) return N0->getOperand(0); return SDValue(); } SDValue DAGCombiner::visitVECREDUCE(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N0.getValueType(); unsigned Opcode = N->getOpcode(); // VECREDUCE over 1-element vector is just an extract. if (VT.getVectorElementCount().isScalar()) { SDLoc dl(N); SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0, DAG.getVectorIdxConstant(0, dl)); if (Res.getValueType() != N->getValueType(0)) Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res); return Res; } // On an boolean vector an and/or reduction is the same as a umin/umax // reduction. Convert them if the latter is legal while the former isn't. if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) { unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX; if (!TLI.isOperationLegalOrCustom(Opcode, VT) && TLI.isOperationLegalOrCustom(NewOpcode, VT) && DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits()) return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0); } // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val) // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val) if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && TLI.isTypeLegal(N0.getOperand(1).getValueType())) { SDValue Vec = N0.getOperand(0); SDValue Subvec = N0.getOperand(1); if ((Opcode == ISD::VECREDUCE_OR && (N0.getOperand(0).isUndef() || isNullOrNullSplat(Vec))) || (Opcode == ISD::VECREDUCE_AND && (N0.getOperand(0).isUndef() || isAllOnesOrAllOnesSplat(Vec)))) return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec); } return SDValue(); } SDValue DAGCombiner::visitVPOp(SDNode *N) { // VP operations in which all vector elements are disabled - either by // determining that the mask is all false or that the EVL is 0 - can be // eliminated. bool AreAllEltsDisabled = false; if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode())) AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx)); if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode())) AreAllEltsDisabled |= ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode()); // This is the only generic VP combine we support for now. if (!AreAllEltsDisabled) return SDValue(); // Binary operations can be replaced by UNDEF. if (ISD::isVPBinaryOp(N->getOpcode())) return DAG.getUNDEF(N->getValueType(0)); // VP Memory operations can be replaced by either the chain (stores) or the // chain + undef (loads). if (const auto *MemSD = dyn_cast(N)) { if (MemSD->writeMem()) return MemSD->getChain(); return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain()); } // Reduction operations return the start operand when no elements are active. if (ISD::isVPReduction(N->getOpcode())) return N->getOperand(0); return SDValue(); } /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle /// with the destination vector and a zero vector. /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> /// vector_shuffle V, Zero, <0, 4, 2, 4> SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { assert(N->getOpcode() == ISD::AND && "Unexpected opcode!"); EVT VT = N->getValueType(0); SDValue LHS = N->getOperand(0); SDValue RHS = peekThroughBitcasts(N->getOperand(1)); SDLoc DL(N); // Make sure we're not running after operation legalization where it // may have custom lowered the vector shuffles. if (LegalOperations) return SDValue(); if (RHS.getOpcode() != ISD::BUILD_VECTOR) return SDValue(); EVT RVT = RHS.getValueType(); unsigned NumElts = RHS.getNumOperands(); // Attempt to create a valid clear mask, splitting the mask into // sub elements and checking to see if each is // all zeros or all ones - suitable for shuffle masking. auto BuildClearMask = [&](int Split) { int NumSubElts = NumElts * Split; int NumSubBits = RVT.getScalarSizeInBits() / Split; SmallVector Indices; for (int i = 0; i != NumSubElts; ++i) { int EltIdx = i / Split; int SubIdx = i % Split; SDValue Elt = RHS.getOperand(EltIdx); // X & undef --> 0 (not undef). So this lane must be converted to choose // from the zero constant vector (same as if the element had all 0-bits). if (Elt.isUndef()) { Indices.push_back(i + NumSubElts); continue; } APInt Bits; if (isa(Elt)) Bits = cast(Elt)->getAPIntValue(); else if (isa(Elt)) Bits = cast(Elt)->getValueAPF().bitcastToAPInt(); else return SDValue(); // Extract the sub element from the constant bit mask. if (DAG.getDataLayout().isBigEndian()) Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits); else Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits); if (Bits.isAllOnes()) Indices.push_back(i); else if (Bits == 0) Indices.push_back(i + NumSubElts); else return SDValue(); } // Let's see if the target supports this vector_shuffle. EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits); EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts); if (!TLI.isVectorClearMaskLegal(Indices, ClearVT)) return SDValue(); SDValue Zero = DAG.getConstant(0, DL, ClearVT); return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL, DAG.getBitcast(ClearVT, LHS), Zero, Indices)); }; // Determine maximum split level (byte level masking). int MaxSplit = 1; if (RVT.getScalarSizeInBits() % 8 == 0) MaxSplit = RVT.getScalarSizeInBits() / 8; for (int Split = 1; Split <= MaxSplit; ++Split) if (RVT.getScalarSizeInBits() % Split == 0) if (SDValue S = BuildClearMask(Split)) return S; return SDValue(); } /// If a vector binop is performed on splat values, it may be profitable to /// extract, scalarize, and insert/splat. static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, const SDLoc &DL) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); unsigned Opcode = N->getOpcode(); EVT VT = N->getValueType(0); EVT EltVT = VT.getVectorElementType(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // TODO: Remove/replace the extract cost check? If the elements are available // as scalars, then there may be no extract cost. Should we ask if // inserting a scalar back into a vector is cheap instead? int Index0, Index1; SDValue Src0 = DAG.getSplatSourceVector(N0, Index0); SDValue Src1 = DAG.getSplatSourceVector(N1, Index1); // Extract element from splat_vector should be free. // TODO: use DAG.isSplatValue instead? bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR && N1.getOpcode() == ISD::SPLAT_VECTOR; if (!Src0 || !Src1 || Index0 != Index1 || Src0.getValueType().getVectorElementType() != EltVT || Src1.getValueType().getVectorElementType() != EltVT || !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) || !TLI.isOperationLegalOrCustom(Opcode, EltVT)) return SDValue(); SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL); SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC); SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC); SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags()); // If all lanes but 1 are undefined, no need to splat the scalar result. // TODO: Keep track of undefs and use that info in the general case. if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() && count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 && count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) { // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) --> // build_vec ..undef, (bo X, Y), undef... SmallVector Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT)); Ops[Index0] = ScalarBO; return DAG.getBuildVector(VT, DL, Ops); } // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index if (VT.isScalableVector()) return DAG.getSplatVector(VT, DL, ScalarBO); SmallVector Ops(VT.getVectorNumElements(), ScalarBO); return DAG.getBuildVector(VT, DL, Ops); } /// Visit a binary vector operation, like ADD. SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) { EVT VT = N->getValueType(0); assert(VT.isVector() && "SimplifyVBinOp only works on vectors!"); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); unsigned Opcode = N->getOpcode(); SDNodeFlags Flags = N->getFlags(); // Move unary shuffles with identical masks after a vector binop: // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask)) // --> shuffle (VBinOp A, B), Undef, Mask // This does not require type legality checks because we are creating the // same types of operations that are in the original sequence. We do have to // restrict ops like integer div that have immediate UB (eg, div-by-zero) // though. This code is adapted from the identical transform in instcombine. if (Opcode != ISD::UDIV && Opcode != ISD::SDIV && Opcode != ISD::UREM && Opcode != ISD::SREM && Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) { auto *Shuf0 = dyn_cast(LHS); auto *Shuf1 = dyn_cast(RHS); if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) && LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() && (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) { SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0), RHS.getOperand(0), Flags); SDValue UndefV = LHS.getOperand(1); return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask()); } // Try to sink a splat shuffle after a binop with a uniform constant. // This is limited to cases where neither the shuffle nor the constant have // undefined elements because that could be poison-unsafe or inhibit // demanded elements analysis. It is further limited to not change a splat // of an inserted scalar because that may be optimized better by // load-folding or other target-specific behaviors. if (isConstOrConstSplat(RHS) && Shuf0 && is_splat(Shuf0->getMask()) && Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() && Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) { // binop (splat X), (splat C) --> splat (binop X, C) SDValue X = Shuf0->getOperand(0); SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags); return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT), Shuf0->getMask()); } if (isConstOrConstSplat(LHS) && Shuf1 && is_splat(Shuf1->getMask()) && Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() && Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) { // binop (splat C), (splat X) --> splat (binop C, X) SDValue X = Shuf1->getOperand(0); SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags); return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT), Shuf1->getMask()); } } // The following pattern is likely to emerge with vector reduction ops. Moving // the binary operation ahead of insertion may allow using a narrower vector // instruction that has better performance than the wide version of the op: // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() && RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() && LHS.getOperand(2) == RHS.getOperand(2) && (LHS.hasOneUse() || RHS.hasOneUse())) { SDValue X = LHS.getOperand(1); SDValue Y = RHS.getOperand(1); SDValue Z = LHS.getOperand(2); EVT NarrowVT = X.getValueType(); if (NarrowVT == Y.getValueType() && TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT, LegalOperations)) { // (binop undef, undef) may not return undef, so compute that result. SDValue VecC = DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT)); SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y); return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z); } } // Make sure all but the first op are undef or constant. auto ConcatWithConstantOrUndef = [](SDValue Concat) { return Concat.getOpcode() == ISD::CONCAT_VECTORS && all_of(drop_begin(Concat->ops()), [](const SDValue &Op) { return Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()); }); }; // The following pattern is likely to emerge with vector reduction ops. Moving // the binary operation ahead of the concat may allow using a narrower vector // instruction that has better performance than the wide version of the op: // VBinOp (concat X, undef/constant), (concat Y, undef/constant) --> // concat (VBinOp X, Y), VecC if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) && (LHS.hasOneUse() || RHS.hasOneUse())) { EVT NarrowVT = LHS.getOperand(0).getValueType(); if (NarrowVT == RHS.getOperand(0).getValueType() && TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) { unsigned NumOperands = LHS.getNumOperands(); SmallVector ConcatOps; for (unsigned i = 0; i != NumOperands; ++i) { // This constant fold for operands 1 and up. ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i), RHS.getOperand(i))); } return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); } } if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL)) return V; return SDValue(); } SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2) { assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2, cast(N0.getOperand(2))->get()); // If we got a simplified select_cc node back from SimplifySelectCC, then // break it down into a new SETCC node, and a new SELECT node, and then return // the SELECT node, since we were called with a SELECT node. if (SCC.getNode()) { // Check to see if we got a select_cc back (to turn into setcc/select). // Otherwise, just return whatever node we got back, like fabs. if (SCC.getOpcode() == ISD::SELECT_CC) { const SDNodeFlags Flags = N0->getFlags(); SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0), N0.getValueType(), SCC.getOperand(0), SCC.getOperand(1), SCC.getOperand(4), Flags); AddToWorklist(SETCC.getNode()); SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC, SCC.getOperand(2), SCC.getOperand(3)); SelectNode->setFlags(Flags); return SelectNode; } return SCC; } return SDValue(); } /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values /// being selected between, see if we can simplify the select. Callers of this /// should assume that TheSelect is deleted if this returns true. As such, they /// should return the appropriate thing (e.g. the node) back to the top-level of /// the DAG combiner loop to avoid it being looked at. bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, SDValue RHS) { // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x)) // The select + setcc is redundant, because fsqrt returns NaN for X < 0. if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) { if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) { // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?)) SDValue Sqrt = RHS; ISD::CondCode CC; SDValue CmpLHS; const ConstantFPSDNode *Zero = nullptr; if (TheSelect->getOpcode() == ISD::SELECT_CC) { CC = cast(TheSelect->getOperand(4))->get(); CmpLHS = TheSelect->getOperand(0); Zero = isConstOrConstSplatFP(TheSelect->getOperand(1)); } else { // SELECT or VSELECT SDValue Cmp = TheSelect->getOperand(0); if (Cmp.getOpcode() == ISD::SETCC) { CC = cast(Cmp.getOperand(2))->get(); CmpLHS = Cmp.getOperand(0); Zero = isConstOrConstSplatFP(Cmp.getOperand(1)); } } if (Zero && Zero->isZero() && Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT || CC == ISD::SETULT || CC == ISD::SETLT)) { // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x)) CombineTo(TheSelect, Sqrt); return true; } } } // Cannot simplify select with vector condition if (TheSelect->getOperand(0).getValueType().isVector()) return false; // If this is a select from two identical things, try to pull the operation // through the select. if (LHS.getOpcode() != RHS.getOpcode() || !LHS.hasOneUse() || !RHS.hasOneUse()) return false; // If this is a load and the token chain is identical, replace the select // of two loads with a load through a select of the address to load from. // This triggers in things like "select bool X, 10.0, 123.0" after the FP // constants have been dropped into the constant pool. if (LHS.getOpcode() == ISD::LOAD) { LoadSDNode *LLD = cast(LHS); LoadSDNode *RLD = cast(RHS); // Token chains must be identical. if (LHS.getOperand(0) != RHS.getOperand(0) || // Do not let this transformation reduce the number of volatile loads. // Be conservative for atomics for the moment // TODO: This does appear to be legal for unordered atomics (see D66309) !LLD->isSimple() || !RLD->isSimple() || // FIXME: If either is a pre/post inc/dec load, // we'd need to split out the address adjustment. LLD->isIndexed() || RLD->isIndexed() || // If this is an EXTLOAD, the VT's must match. LLD->getMemoryVT() != RLD->getMemoryVT() || // If this is an EXTLOAD, the kind of extension must match. (LLD->getExtensionType() != RLD->getExtensionType() && // The only exception is if one of the extensions is anyext. LLD->getExtensionType() != ISD::EXTLOAD && RLD->getExtensionType() != ISD::EXTLOAD) || // FIXME: this discards src value information. This is // over-conservative. It would be beneficial to be able to remember // both potential memory locations. Since we are discarding // src value info, don't do the transformation if the memory // locations are not in the default address space. LLD->getPointerInfo().getAddrSpace() != 0 || RLD->getPointerInfo().getAddrSpace() != 0 || // We can't produce a CMOV of a TargetFrameIndex since we won't // generate the address generation required. LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex || RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex || !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(), LLD->getBasePtr().getValueType())) return false; // The loads must not depend on one another. if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD)) return false; // Check that the select condition doesn't reach either load. If so, // folding this will induce a cycle into the DAG. If not, this is safe to // xform, so create a select of the addresses. SmallPtrSet Visited; SmallVector Worklist; // Always fail if LLD and RLD are not independent. TheSelect is a // predecessor to all Nodes in question so we need not search past it. Visited.insert(TheSelect); Worklist.push_back(LLD); Worklist.push_back(RLD); if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) || SDNode::hasPredecessorHelper(RLD, Visited, Worklist)) return false; SDValue Addr; if (TheSelect->getOpcode() == ISD::SELECT) { // We cannot do this optimization if any pair of {RLD, LLD} is a // predecessor to {RLD, LLD, CondNode}. As we've already compared the // Loads, we only need to check if CondNode is a successor to one of the // loads. We can further avoid this if there's no use of their chain // value. SDNode *CondNode = TheSelect->getOperand(0).getNode(); Worklist.push_back(CondNode); if ((LLD->hasAnyUseOfValue(1) && SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) || (RLD->hasAnyUseOfValue(1) && SDNode::hasPredecessorHelper(RLD, Visited, Worklist))) return false; Addr = DAG.getSelect(SDLoc(TheSelect), LLD->getBasePtr().getValueType(), TheSelect->getOperand(0), LLD->getBasePtr(), RLD->getBasePtr()); } else { // Otherwise SELECT_CC // We cannot do this optimization if any pair of {RLD, LLD} is a // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared // the Loads, we only need to check if CondLHS/CondRHS is a successor to // one of the loads. We can further avoid this if there's no use of their // chain value. SDNode *CondLHS = TheSelect->getOperand(0).getNode(); SDNode *CondRHS = TheSelect->getOperand(1).getNode(); Worklist.push_back(CondLHS); Worklist.push_back(CondRHS); if ((LLD->hasAnyUseOfValue(1) && SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) || (RLD->hasAnyUseOfValue(1) && SDNode::hasPredecessorHelper(RLD, Visited, Worklist))) return false; Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect), LLD->getBasePtr().getValueType(), TheSelect->getOperand(0), TheSelect->getOperand(1), LLD->getBasePtr(), RLD->getBasePtr(), TheSelect->getOperand(4)); } SDValue Load; // It is safe to replace the two loads if they have different alignments, // but the new load must be the minimum (most restrictive) alignment of the // inputs. Align Alignment = std::min(LLD->getAlign(), RLD->getAlign()); MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags(); if (!RLD->isInvariant()) MMOFlags &= ~MachineMemOperand::MOInvariant; if (!RLD->isDereferenceable()) MMOFlags &= ~MachineMemOperand::MODereferenceable; if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { // FIXME: Discards pointer and AA info. Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect), LLD->getChain(), Addr, MachinePointerInfo(), Alignment, MMOFlags); } else { // FIXME: Discards pointer and AA info. Load = DAG.getExtLoad( LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType() : LLD->getExtensionType(), SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr, MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags); } // Users of the select now use the result of the load. CombineTo(TheSelect, Load); // Users of the old loads now use the new load's chain. We know the // old-load value is dead now. CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1)); CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1)); return true; } return false; } /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and /// bitwise 'and'. SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC) { // If this is a select where the false operand is zero and the compare is a // check of the sign bit, see if we can perform the "gzip trick": // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A EVT XType = N0.getValueType(); EVT AType = N2.getValueType(); if (!isNullConstant(N3) || !XType.bitsGE(AType)) return SDValue(); // If the comparison is testing for a positive value, we have to invert // the sign bit mask, so only do that transform if the target has a bitwise // 'and not' instruction (the invert is free). if (CC == ISD::SETGT && TLI.hasAndNot(N2)) { // (X > -1) ? A : 0 // (X > 0) ? X : 0 <-- This is canonical signed max. if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2))) return SDValue(); } else if (CC == ISD::SETLT) { // (X < 0) ? A : 0 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min. if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2))) return SDValue(); } else { return SDValue(); } // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit // constant. EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType()); auto *N2C = dyn_cast(N2.getNode()); if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) { unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1; if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) { SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy); SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt); AddToWorklist(Shift.getNode()); if (XType.bitsGT(AType)) { Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); AddToWorklist(Shift.getNode()); } if (CC == ISD::SETGT) Shift = DAG.getNOT(DL, Shift, AType); return DAG.getNode(ISD::AND, DL, AType, Shift, N2); } } unsigned ShCt = XType.getSizeInBits() - 1; if (TLI.shouldAvoidTransformToShift(XType, ShCt)) return SDValue(); SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy); SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt); AddToWorklist(Shift.getNode()); if (XType.bitsGT(AType)) { Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); AddToWorklist(Shift.getNode()); } if (CC == ISD::SETGT) Shift = DAG.getNOT(DL, Shift, AType); return DAG.getNode(ISD::AND, DL, AType, Shift, N2); } // Fold select(cc, binop(), binop()) -> binop(select(), select()) etc. SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); EVT VT = N->getValueType(0); SDLoc DL(N); unsigned BinOpc = N1.getOpcode(); if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc)) return SDValue(); // The use checks are intentionally on SDNode because we may be dealing // with opcodes that produce more than one SDValue. // TODO: Do we really need to check N0 (the condition operand of the select)? // But removing that clause could cause an infinite loop... if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse()) return SDValue(); // Binops may include opcodes that return multiple values, so all values // must be created/propagated from the newly created binops below. SDVTList OpVTs = N1->getVTList(); // Fold select(cond, binop(x, y), binop(z, y)) // --> binop(select(cond, x, z), y) if (N1.getOperand(1) == N2.getOperand(1)) { SDValue NewSel = DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0)); SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1)); NewBinOp->setFlags(N1->getFlags()); NewBinOp->intersectFlagsWith(N2->getFlags()); return NewBinOp; } // Fold select(cond, binop(x, y), binop(x, z)) // --> binop(x, select(cond, y, z)) // Second op VT might be different (e.g. shift amount type) if (N1.getOperand(0) == N2.getOperand(0) && VT == N1.getOperand(1).getValueType() && VT == N2.getOperand(1).getValueType()) { SDValue NewSel = DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1)); SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel); NewBinOp->setFlags(N1->getFlags()); NewBinOp->intersectFlagsWith(N2->getFlags()); return NewBinOp; } // TODO: Handle isCommutativeBinOp patterns as well? return SDValue(); } // Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values. SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); bool IsFabs = N->getOpcode() == ISD::FABS; bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT); if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse()) return SDValue(); SDValue Int = N0.getOperand(0); EVT IntVT = Int.getValueType(); // The operand to cast should be integer. if (!IntVT.isInteger() || IntVT.isVector()) return SDValue(); // (fneg (bitconvert x)) -> (bitconvert (xor x sign)) // (fabs (bitconvert x)) -> (bitconvert (and x ~sign)) APInt SignMask; if (N0.getValueType().isVector()) { // For vector, create a sign mask (0x80...) or its inverse (for fabs, // 0x7f...) per element and splat it. SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits()); if (IsFabs) SignMask = ~SignMask; SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); } else { // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...) SignMask = APInt::getSignMask(IntVT.getSizeInBits()); if (IsFabs) SignMask = ~SignMask; } SDLoc DL(N0); Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int, DAG.getConstant(SignMask, DL, IntVT)); AddToWorklist(Int.getNode()); return DAG.getBitcast(VT, Int); } /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)" /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0 /// in it. This may be a win when the constant is not otherwise available /// because it replaces two constant pool loads with one. SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset( const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC) { if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType())) return SDValue(); // If we are before legalize types, we want the other legalization to happen // first (for example, to avoid messing with soft float). auto *TV = dyn_cast(N2); auto *FV = dyn_cast(N3); EVT VT = N2.getValueType(); if (!TV || !FV || !TLI.isTypeLegal(VT)) return SDValue(); // If a constant can be materialized without loads, this does not make sense. if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal || TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) || TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize)) return SDValue(); // If both constants have multiple uses, then we won't need to do an extra // load. The values are likely around in registers for other users. if (!TV->hasOneUse() && !FV->hasOneUse()) return SDValue(); Constant *Elts[] = { const_cast(FV->getConstantFPValue()), const_cast(TV->getConstantFPValue()) }; Type *FPTy = Elts[0]->getType(); const DataLayout &TD = DAG.getDataLayout(); // Create a ConstantArray of the two constants. Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts); SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()), TD.getPrefTypeAlign(FPTy)); Align Alignment = cast(CPIdx)->getAlign(); // Get offsets to the 0 and 1 elements of the array, so we can select between // them. SDValue Zero = DAG.getIntPtrConstant(0, DL); unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType()); SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV)); SDValue Cond = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC); AddToWorklist(Cond.getNode()); SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero); AddToWorklist(CstOffset.getNode()); CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset); AddToWorklist(CPIdx.getNode()); return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool( DAG.getMachineFunction()), Alignment); } /// Simplify an expression of the form (N0 cond N1) ? N2 : N3 /// where 'cond' is the comparison specified by CC. SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, bool NotExtCompare) { // (x ? y : y) -> y. if (N2 == N3) return N2; EVT CmpOpVT = N0.getValueType(); EVT CmpResVT = getSetCCResultType(CmpOpVT); EVT VT = N2.getValueType(); auto *N1C = dyn_cast(N1.getNode()); auto *N2C = dyn_cast(N2.getNode()); auto *N3C = dyn_cast(N3.getNode()); // Determine if the condition we're dealing with is constant. if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) { AddToWorklist(SCC.getNode()); if (auto *SCCC = dyn_cast(SCC)) { // fold select_cc true, x, y -> x // fold select_cc false, x, y -> y return !(SCCC->isZero()) ? N2 : N3; } } if (SDValue V = convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC)) return V; if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC)) return V; // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A) // where y is has a single bit set. // A plaintext description would be, we can turn the SELECT_CC into an AND // when the condition can be materialized as an all-ones register. Any // single bit-test can be materialized as an all-ones register with // shift-left and shift-right-arith. if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND && N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) { SDValue AndLHS = N0->getOperand(0); auto *ConstAndRHS = dyn_cast(N0->getOperand(1)); if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) { // Shift the tested bit over the sign bit. const APInt &AndMask = ConstAndRHS->getAPIntValue(); unsigned ShCt = AndMask.getBitWidth() - 1; if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) { SDValue ShlAmt = DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS), getShiftAmountTy(AndLHS.getValueType())); SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt); // Now arithmetic right shift it all the way over, so the result is // either all-ones, or zero. SDValue ShrAmt = DAG.getConstant(ShCt, SDLoc(Shl), getShiftAmountTy(Shl.getValueType())); SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt); return DAG.getNode(ISD::AND, DL, VT, Shr, N3); } } } // fold select C, 16, 0 -> shl C, 4 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2(); bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2(); if ((Fold || Swap) && TLI.getBooleanContents(CmpOpVT) == TargetLowering::ZeroOrOneBooleanContent && (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) { if (Swap) { CC = ISD::getSetCCInverse(CC, CmpOpVT); std::swap(N2C, N3C); } // If the caller doesn't want us to simplify this into a zext of a compare, // don't do it. if (NotExtCompare && N2C->isOne()) return SDValue(); SDValue Temp, SCC; // zext (setcc n0, n1) if (LegalTypes) { SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC); if (VT.bitsLT(SCC.getValueType())) Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT); else Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC); } else { SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC); Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC); } AddToWorklist(SCC.getNode()); AddToWorklist(Temp.getNode()); if (N2C->isOne()) return Temp; unsigned ShCt = N2C->getAPIntValue().logBase2(); if (TLI.shouldAvoidTransformToShift(VT, ShCt)) return SDValue(); // shl setcc result by log2 n2c return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp, DAG.getConstant(ShCt, SDLoc(Temp), getShiftAmountTy(Temp.getValueType()))); } // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X) // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X) // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X) // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X) // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X) // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X) // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X) // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X) if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { SDValue ValueOnZero = N2; SDValue Count = N3; // If the condition is NE instead of E, swap the operands. if (CC == ISD::SETNE) std::swap(ValueOnZero, Count); // Check if the value on zero is a constant equal to the bits in the type. if (auto *ValueOnZeroC = dyn_cast(ValueOnZero)) { if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) { // If the other operand is cttz/cttz_zero_undef of N0, and cttz is // legal, combine to just cttz. if ((Count.getOpcode() == ISD::CTTZ || Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) && N0 == Count.getOperand(0) && (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT))) return DAG.getNode(ISD::CTTZ, DL, VT, N0); // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is // legal, combine to just ctlz. if ((Count.getOpcode() == ISD::CTLZ || Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) && N0 == Count.getOperand(0) && (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT))) return DAG.getNode(ISD::CTLZ, DL, VT, N0); } } } // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C if (!NotExtCompare && N1C && N2C && N3C && N2C->getAPIntValue() == ~N3C->getAPIntValue() && ((N1C->isAllOnes() && CC == ISD::SETGT) || (N1C->isZero() && CC == ISD::SETLT)) && !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) { SDValue ASR = DAG.getNode( ISD::SRA, DL, CmpOpVT, N0, DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT)); return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT), DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT)); } if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG)) return S; if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG)) return S; return SDValue(); } /// This is a stub for TargetLowering::SimplifySetCC. SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &DL, bool foldBooleans) { TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, Level, false, this); return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); } /// Given an ISD::SDIV node expressing a divide by constant, return /// a DAG expression to select that will generate the same value by multiplying /// by a magic number. /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". SDValue DAGCombiner::BuildSDIV(SDNode *N) { // when optimising for minimum size, we don't want to expand a div to a mul // and a shift. if (DAG.getMachineFunction().getFunction().hasMinSize()) return SDValue(); SmallVector Built; if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) { for (SDNode *N : Built) AddToWorklist(N); return S; } return SDValue(); } /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a /// DAG expression that will generate the same value by right shifting. SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) { ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); if (!C) return SDValue(); // Avoid division by zero. if (C->isZero()) return SDValue(); SmallVector Built; if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) { for (SDNode *N : Built) AddToWorklist(N); return S; } return SDValue(); } /// Given an ISD::UDIV node expressing a divide by constant, return a DAG /// expression that will generate the same value by multiplying by a magic /// number. /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". SDValue DAGCombiner::BuildUDIV(SDNode *N) { // when optimising for minimum size, we don't want to expand a div to a mul // and a shift. if (DAG.getMachineFunction().getFunction().hasMinSize()) return SDValue(); SmallVector Built; if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) { for (SDNode *N : Built) AddToWorklist(N); return S; } return SDValue(); } /// Given an ISD::SREM node expressing a remainder by constant power of 2, /// return a DAG expression that will generate the same value. SDValue DAGCombiner::BuildSREMPow2(SDNode *N) { ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); if (!C) return SDValue(); // Avoid division by zero. if (C->isZero()) return SDValue(); SmallVector Built; if (SDValue S = TLI.BuildSREMPow2(N, C->getAPIntValue(), DAG, Built)) { for (SDNode *N : Built) AddToWorklist(N); return S; } return SDValue(); } /// Determines the LogBase2 value for a non-null input value using the /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V). SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) { EVT VT = V.getValueType(); SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V); SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT); SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz); return LogBase2; } /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) /// For the reciprocal, we need to find the zero of the function: /// F(X) = 1/X - A [which has a zero at X = 1/A] /// => /// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form /// does not require additional intermediate precision] /// For the last iteration, put numerator N into it to gain more precision: /// Result = N X_i + X_i (N - N A X_i) SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags) { if (LegalDAG) return SDValue(); // TODO: Handle extended types? EVT VT = Op.getValueType(); if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64) return SDValue(); // If estimates are explicitly disabled for this function, we're done. MachineFunction &MF = DAG.getMachineFunction(); int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF); if (Enabled == TLI.ReciprocalEstimate::Disabled) return SDValue(); // Estimates may be explicitly enabled for this type with a custom number of // refinement steps. int Iterations = TLI.getDivRefinementSteps(VT, MF); if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) { AddToWorklist(Est.getNode()); SDLoc DL(Op); if (Iterations) { SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); // Newton iterations: Est = Est + Est (N - Arg * Est) // If this is the last iteration, also multiply by the numerator. for (int i = 0; i < Iterations; ++i) { SDValue MulEst = Est; if (i == Iterations - 1) { MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags); AddToWorklist(MulEst.getNode()); } SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags); AddToWorklist(NewEst.getNode()); NewEst = DAG.getNode(ISD::FSUB, DL, VT, (i == Iterations - 1 ? N : FPOne), NewEst, Flags); AddToWorklist(NewEst.getNode()); NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); AddToWorklist(NewEst.getNode()); Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags); AddToWorklist(Est.getNode()); } } else { // If no iterations are available, multiply with N. Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags); AddToWorklist(Est.getNode()); } return Est; } return SDValue(); } /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) /// For the reciprocal sqrt, we need to find the zero of the function: /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] /// => /// X_{i+1} = X_i (1.5 - A X_i^2 / 2) /// As a result, we precompute A/2 prior to the iteration loop. SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations, SDNodeFlags Flags, bool Reciprocal) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT); // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that // this entire sequence requires only one FP constant. SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags); HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags); // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) for (unsigned i = 0; i < Iterations; ++i) { SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags); NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags); NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags); Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); } // If non-reciprocal square root is requested, multiply the result by Arg. if (!Reciprocal) Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags); return Est; } /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) /// For the reciprocal sqrt, we need to find the zero of the function: /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] /// => /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0)) SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations, SDNodeFlags Flags, bool Reciprocal) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT); SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT); // This routine must enter the loop below to work correctly // when (Reciprocal == false). assert(Iterations > 0); // Newton iterations for reciprocal square root: // E = (E * -0.5) * ((A * E) * E + -3.0) for (unsigned i = 0; i < Iterations; ++i) { SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags); SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags); SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags); // When calculating a square root at the last iteration build: // S = ((A * E) * -0.5) * ((A * E) * E + -3.0) // (notice a common subexpression) SDValue LHS; if (Reciprocal || (i + 1) < Iterations) { // RSQRT: LHS = (E * -0.5) LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags); } else { // SQRT: LHS = (A * E) * -0.5 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags); } Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags); } return Est; } /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if /// Op can be zero. SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Reciprocal) { if (LegalDAG) return SDValue(); // TODO: Handle extended types? EVT VT = Op.getValueType(); if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64) return SDValue(); // If estimates are explicitly disabled for this function, we're done. MachineFunction &MF = DAG.getMachineFunction(); int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF); if (Enabled == TLI.ReciprocalEstimate::Disabled) return SDValue(); // Estimates may be explicitly enabled for this type with a custom number of // refinement steps. int Iterations = TLI.getSqrtRefinementSteps(VT, MF); bool UseOneConstNR = false; if (SDValue Est = TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR, Reciprocal)) { AddToWorklist(Est.getNode()); if (Iterations) Est = UseOneConstNR ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal) : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal); if (!Reciprocal) { SDLoc DL(Op); // Try the target specific test first. SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT)); // The estimate is now completely wrong if the input was exactly 0.0 or // possibly a denormal. Force the answer to 0.0 or value provided by // target for those cases. Est = DAG.getNode( Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est); } return Est; } return SDValue(); } SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) { return buildSqrtEstimateImpl(Op, Flags, true); } SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) { return buildSqrtEstimateImpl(Op, Flags, false); } /// Return true if there is any possibility that the two addresses overlap. bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const { struct MemUseCharacteristics { bool IsVolatile; bool IsAtomic; SDValue BasePtr; int64_t Offset; Optional NumBytes; MachineMemOperand *MMO; }; auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics { if (const auto *LSN = dyn_cast(N)) { int64_t Offset = 0; if (auto *C = dyn_cast(LSN->getOffset())) Offset = (LSN->getAddressingMode() == ISD::PRE_INC) ? C->getSExtValue() : (LSN->getAddressingMode() == ISD::PRE_DEC) ? -1 * C->getSExtValue() : 0; uint64_t Size = MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize()); return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(), Offset /*base offset*/, Optional(Size), LSN->getMemOperand()}; } if (const auto *LN = cast(N)) return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1), (LN->hasOffset()) ? LN->getOffset() : 0, (LN->hasOffset()) ? Optional(LN->getSize()) : Optional(), (MachineMemOperand *)nullptr}; // Default. return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(), (int64_t)0 /*offset*/, Optional() /*size*/, (MachineMemOperand *)nullptr}; }; MemUseCharacteristics MUC0 = getCharacteristics(Op0), MUC1 = getCharacteristics(Op1); // If they are to the same address, then they must be aliases. if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr && MUC0.Offset == MUC1.Offset) return true; // If they are both volatile then they cannot be reordered. if (MUC0.IsVolatile && MUC1.IsVolatile) return true; // Be conservative about atomics for the moment // TODO: This is way overconservative for unordered atomics (see D66309) if (MUC0.IsAtomic && MUC1.IsAtomic) return true; if (MUC0.MMO && MUC1.MMO) { if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) || (MUC1.MMO->isInvariant() && MUC0.MMO->isStore())) return false; } // Try to prove that there is aliasing, or that there is no aliasing. Either // way, we can return now. If nothing can be proved, proceed with more tests. bool IsAlias; if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes, DAG, IsAlias)) return IsAlias; // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if // either are not known. if (!MUC0.MMO || !MUC1.MMO) return true; // If one operation reads from invariant memory, and the other may store, they // cannot alias. These should really be checking the equivalent of mayWrite, // but it only matters for memory nodes other than load /store. if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) || (MUC1.MMO->isInvariant() && MUC0.MMO->isStore())) return false; // If we know required SrcValue1 and SrcValue2 have relatively large // alignment compared to the size and offset of the access, we may be able // to prove they do not alias. This check is conservative for now to catch // cases created by splitting vector types, it only works when the offsets are // multiples of the size of the data. int64_t SrcValOffset0 = MUC0.MMO->getOffset(); int64_t SrcValOffset1 = MUC1.MMO->getOffset(); Align OrigAlignment0 = MUC0.MMO->getBaseAlign(); Align OrigAlignment1 = MUC1.MMO->getBaseAlign(); auto &Size0 = MUC0.NumBytes; auto &Size1 = MUC1.NumBytes; if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 && Size0.has_value() && Size1.has_value() && *Size0 == *Size1 && OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 && SrcValOffset1 % *Size1 == 0) { int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value(); int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value(); // There is no overlap between these relatively aligned accesses of // similar size. Return no alias. if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0) return false; } bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 ? CombinerGlobalAA : DAG.getSubtarget().useAA(); #ifndef NDEBUG if (CombinerAAOnlyFunc.getNumOccurrences() && CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) UseAA = false; #endif if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() && Size0 && Size1) { // Use alias analysis information. int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1); int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset; int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset; if (AA->isNoAlias( MemoryLocation(MUC0.MMO->getValue(), Overlap0, UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()), MemoryLocation(MUC1.MMO->getValue(), Overlap1, UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes()))) return false; } // Otherwise we have to assume they alias. return true; } /// Walk up chain skipping non-aliasing memory nodes, /// looking for aliasing nodes and adding them to the Aliases vector. void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, SmallVectorImpl &Aliases) { SmallVector Chains; // List of chains to visit. SmallPtrSet Visited; // Visited node set. // Get alias information for node. // TODO: relax aliasing for unordered atomics (see D66309) const bool IsLoad = isa(N) && cast(N)->isSimple(); // Starting off. Chains.push_back(OriginalChain); unsigned Depth = 0; // Attempt to improve chain by a single step auto ImproveChain = [&](SDValue &C) -> bool { switch (C.getOpcode()) { case ISD::EntryToken: // No need to mark EntryToken. C = SDValue(); return true; case ISD::LOAD: case ISD::STORE: { // Get alias information for C. // TODO: Relax aliasing for unordered atomics (see D66309) bool IsOpLoad = isa(C.getNode()) && cast(C.getNode())->isSimple(); if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) { // Look further up the chain. C = C.getOperand(0); return true; } // Alias, so stop here. return false; } case ISD::CopyFromReg: // Always forward past past CopyFromReg. C = C.getOperand(0); return true; case ISD::LIFETIME_START: case ISD::LIFETIME_END: { // We can forward past any lifetime start/end that can be proven not to // alias the memory access. if (!mayAlias(N, C.getNode())) { // Look further up the chain. C = C.getOperand(0); return true; } return false; } default: return false; } }; // Look at each chain and determine if it is an alias. If so, add it to the // aliases list. If not, then continue up the chain looking for the next // candidate. while (!Chains.empty()) { SDValue Chain = Chains.pop_back_val(); // Don't bother if we've seen Chain before. if (!Visited.insert(Chain.getNode()).second) continue; // For TokenFactor nodes, look at each operand and only continue up the // chain until we reach the depth limit. // // FIXME: The depth check could be made to return the last non-aliasing // chain we found before we hit a tokenfactor rather than the original // chain. if (Depth > TLI.getGatherAllAliasesMaxDepth()) { Aliases.clear(); Aliases.push_back(OriginalChain); return; } if (Chain.getOpcode() == ISD::TokenFactor) { // We have to check each of the operands of the token factor for "small" // token factors, so we queue them up. Adding the operands to the queue // (stack) in reverse order maintains the original order and increases the // likelihood that getNode will find a matching token factor (CSE.) if (Chain.getNumOperands() > 16) { Aliases.push_back(Chain); continue; } for (unsigned n = Chain.getNumOperands(); n;) Chains.push_back(Chain.getOperand(--n)); ++Depth; continue; } // Everything else if (ImproveChain(Chain)) { // Updated Chain Found, Consider new chain if one exists. if (Chain.getNode()) Chains.push_back(Chain); ++Depth; continue; } // No Improved Chain Possible, treat as Alias. Aliases.push_back(Chain); } } /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain /// (aliasing node.) SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { if (OptLevel == CodeGenOpt::None) return OldChain; // Ops for replacing token factor. SmallVector Aliases; // Accumulate all the aliases to this node. GatherAllAliases(N, OldChain, Aliases); // If no operands then chain to entry token. if (Aliases.size() == 0) return DAG.getEntryNode(); // If a single operand then chain to it. We don't need to revisit it. if (Aliases.size() == 1) return Aliases[0]; // Construct a custom tailored token factor. return DAG.getTokenFactor(SDLoc(N), Aliases); } namespace { // TODO: Replace with with std::monostate when we move to C++17. struct UnitT { } Unit; bool operator==(const UnitT &, const UnitT &) { return true; } bool operator!=(const UnitT &, const UnitT &) { return false; } } // namespace // This function tries to collect a bunch of potentially interesting // nodes to improve the chains of, all at once. This might seem // redundant, as this function gets called when visiting every store // node, so why not let the work be done on each store as it's visited? // // I believe this is mainly important because mergeConsecutiveStores // is unable to deal with merging stores of different sizes, so unless // we improve the chains of all the potential candidates up-front // before running mergeConsecutiveStores, it might only see some of // the nodes that will eventually be candidates, and then not be able // to go from a partially-merged state to the desired final // fully-merged state. bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) { SmallVector ChainedStores; StoreSDNode *STChain = St; // Intervals records which offsets from BaseIndex have been covered. In // the common case, every store writes to the immediately previous address // space and thus merged with the previous interval at insertion time. using IMap = llvm::IntervalMap>; IMap::Allocator A; IMap Intervals(A); // This holds the base pointer, index, and the offset in bytes from the base // pointer. const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG); // We must have a base and an offset. if (!BasePtr.getBase().getNode()) return false; // Do not handle stores to undef base pointers. if (BasePtr.getBase().isUndef()) return false; // Do not handle stores to opaque types if (St->getMemoryVT().isZeroSized()) return false; // BaseIndexOffset assumes that offsets are fixed-size, which // is not valid for scalable vectors where the offsets are // scaled by `vscale`, so bail out early. if (St->getMemoryVT().isScalableVector()) return false; // Add ST's interval. Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit); while (StoreSDNode *Chain = dyn_cast(STChain->getChain())) { if (Chain->getMemoryVT().isScalableVector()) return false; // If the chain has more than one use, then we can't reorder the mem ops. if (!SDValue(Chain, 0)->hasOneUse()) break; // TODO: Relax for unordered atomics (see D66309) if (!Chain->isSimple() || Chain->isIndexed()) break; // Find the base pointer and offset for this memory node. const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG); // Check that the base pointer is the same as the original one. int64_t Offset; if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset)) break; int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8; // Make sure we don't overlap with other intervals by checking the ones to // the left or right before inserting. auto I = Intervals.find(Offset); // If there's a next interval, we should end before it. if (I != Intervals.end() && I.start() < (Offset + Length)) break; // If there's a previous interval, we should start after it. if (I != Intervals.begin() && (--I).stop() <= Offset) break; Intervals.insert(Offset, Offset + Length, Unit); ChainedStores.push_back(Chain); STChain = Chain; } // If we didn't find a chained store, exit. if (ChainedStores.size() == 0) return false; // Improve all chained stores (St and ChainedStores members) starting from // where the store chain ended and return single TokenFactor. SDValue NewChain = STChain->getChain(); SmallVector TFOps; for (unsigned I = ChainedStores.size(); I;) { StoreSDNode *S = ChainedStores[--I]; SDValue BetterChain = FindBetterChain(S, NewChain); S = cast(DAG.UpdateNodeOperands( S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3))); TFOps.push_back(SDValue(S, 0)); ChainedStores[I] = S; } // Improve St's chain. Use a new node to avoid creating a loop from CombineTo. SDValue BetterChain = FindBetterChain(St, NewChain); SDValue NewST; if (St->isTruncatingStore()) NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(), St->getBasePtr(), St->getMemoryVT(), St->getMemOperand()); else NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(), St->getBasePtr(), St->getMemOperand()); TFOps.push_back(NewST); // If we improved every element of TFOps, then we've lost the dependence on // NewChain to successors of St and we need to add it back to TFOps. Do so at // the beginning to keep relative order consistent with FindBetterChains. auto hasImprovedChain = [&](SDValue ST) -> bool { return ST->getOperand(0) != NewChain; }; bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain); if (AddNewChain) TFOps.insert(TFOps.begin(), NewChain); SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps); CombineTo(St, TF); // Add TF and its operands to the worklist. AddToWorklist(TF.getNode()); for (const SDValue &Op : TF->ops()) AddToWorklist(Op.getNode()); AddToWorklist(STChain); return true; } bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { if (OptLevel == CodeGenOpt::None) return false; const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG); // We must have a base and an offset. if (!BasePtr.getBase().getNode()) return false; // Do not handle stores to undef base pointers. if (BasePtr.getBase().isUndef()) return false; // Directly improve a chain of disjoint stores starting at St. if (parallelizeChainedStores(St)) return true; // Improve St's Chain.. SDValue BetterChain = FindBetterChain(St, St->getChain()); if (St->getChain() != BetterChain) { replaceStoreChain(St, BetterChain); return true; } return false; } /// This is the entry point for the file. void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA, CodeGenOpt::Level OptLevel) { /// This is the main entry point to this class. DAGCombiner(*this, AA, OptLevel).Run(Level); } diff --git a/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp index 91a51485026e..d96c0c85d5bd 100644 --- a/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp +++ b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp @@ -1,504 +1,758 @@ //===-- lib/DebugInfo/Symbolize/MarkupFilter.cpp -------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// /// \file /// This file defines the implementation of a filter that replaces symbolizer /// markup with human-readable expressions. /// /// See https://llvm.org/docs/SymbolizerMarkupFormat.html /// //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/Symbolize/MarkupFilter.h" #include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/DebugInfo/DIContext.h" #include "llvm/DebugInfo/Symbolize/Markup.h" +#include "llvm/DebugInfo/Symbolize/Symbolize.h" #include "llvm/Debuginfod/Debuginfod.h" #include "llvm/Demangle/Demangle.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Error.h" +#include "llvm/Support/Format.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace llvm::symbolize; -MarkupFilter::MarkupFilter(raw_ostream &OS, Optional ColorsEnabled) - : OS(OS), ColorsEnabled(ColorsEnabled.value_or( - WithColor::defaultAutoDetectFunction()(OS))) {} +MarkupFilter::MarkupFilter(raw_ostream &OS, LLVMSymbolizer &Symbolizer, + Optional ColorsEnabled) + : OS(OS), Symbolizer(Symbolizer), + ColorsEnabled( + ColorsEnabled.value_or(WithColor::defaultAutoDetectFunction()(OS))) {} void MarkupFilter::filter(StringRef Line) { this->Line = Line; resetColor(); Parser.parseLine(Line); SmallVector DeferredNodes; // See if the line is a contextual (i.e. contains a contextual element). // In this case, anything after the contextual element is elided, or the whole // line may be elided. while (Optional Node = Parser.nextNode()) { // If this was a contextual line, then summarily stop processing. if (tryContextualElement(*Node, DeferredNodes)) return; // This node may yet be part of an elided contextual line. DeferredNodes.push_back(*Node); } // This was not a contextual line, so nothing in it should be elided. endAnyModuleInfoLine(); for (const MarkupNode &Node : DeferredNodes) filterNode(Node); } void MarkupFilter::finish() { Parser.flush(); while (Optional Node = Parser.nextNode()) filterNode(*Node); endAnyModuleInfoLine(); resetColor(); Modules.clear(); MMaps.clear(); } // See if the given node is a contextual element and handle it if so. This may // either output or defer the element; in the former case, it will first emit // any DeferredNodes. // // Returns true if the given element was a contextual element. In this case, // DeferredNodes should be considered handled and should not be emitted. The // rest of the containing line must also be ignored in case the element was // deferred to a following line. bool MarkupFilter::tryContextualElement( const MarkupNode &Node, const SmallVector &DeferredNodes) { if (tryMMap(Node, DeferredNodes)) return true; if (tryReset(Node, DeferredNodes)) return true; return tryModule(Node, DeferredNodes); } bool MarkupFilter::tryMMap(const MarkupNode &Node, const SmallVector &DeferredNodes) { if (Node.Tag != "mmap") return false; Optional ParsedMMap = parseMMap(Node); if (!ParsedMMap) return true; - if (const MMap *M = overlappingMMap(*ParsedMMap)) { + if (const MMap *M = getOverlappingMMap(*ParsedMMap)) { WithColor::error(errs()) - << formatv("overlapping mmap: #{0:x} [{1:x},{2:x})\n", M->Mod->ID, - M->Addr, M->Addr + M->Size); + << formatv("overlapping mmap: #{0:x} [{1:x}-{2:x}]\n", M->Mod->ID, + M->Addr, M->Addr + M->Size - 1); reportLocation(Node.Fields[0].begin()); return true; } auto Res = MMaps.emplace(ParsedMMap->Addr, std::move(*ParsedMMap)); assert(Res.second && "Overlap check should ensure emplace succeeds."); MMap &MMap = Res.first->second; if (!MIL || MIL->Mod != MMap.Mod) { endAnyModuleInfoLine(); for (const MarkupNode &Node : DeferredNodes) filterNode(Node); beginModuleInfoLine(MMap.Mod); OS << "; adds"; } MIL->MMaps.push_back(&MMap); return true; } bool MarkupFilter::tryReset(const MarkupNode &Node, const SmallVector &DeferredNodes) { if (Node.Tag != "reset") return false; if (!checkNumFields(Node, 0)) return true; if (!Modules.empty() || !MMaps.empty()) { endAnyModuleInfoLine(); for (const MarkupNode &Node : DeferredNodes) filterNode(Node); highlight(); OS << "[[[reset]]]" << lineEnding(); restoreColor(); Modules.clear(); MMaps.clear(); } return true; } bool MarkupFilter::tryModule(const MarkupNode &Node, const SmallVector &DeferredNodes) { if (Node.Tag != "module") return false; Optional ParsedModule = parseModule(Node); if (!ParsedModule) return true; auto Res = Modules.try_emplace( ParsedModule->ID, std::make_unique(std::move(*ParsedModule))); if (!Res.second) { WithColor::error(errs()) << "duplicate module ID\n"; reportLocation(Node.Fields[0].begin()); return true; } Module &Module = *Res.first->second; endAnyModuleInfoLine(); for (const MarkupNode &Node : DeferredNodes) filterNode(Node); beginModuleInfoLine(&Module); OS << "; BuildID="; - highlightValue(); - OS << toHex(Module.BuildID, /*LowerCase=*/true); - highlight(); + printValue(toHex(Module.BuildID, /*LowerCase=*/true)); return true; } void MarkupFilter::beginModuleInfoLine(const Module *M) { highlight(); OS << "[[[ELF module"; - highlightValue(); - OS << formatv(" #{0:x} \"{1}\"", M->ID, M->Name); - highlight(); + printValue(formatv(" #{0:x} ", M->ID)); + OS << '"'; + printValue(M->Name); + OS << '"'; MIL = ModuleInfoLine{M}; } void MarkupFilter::endAnyModuleInfoLine() { if (!MIL) return; llvm::stable_sort(MIL->MMaps, [](const MMap *A, const MMap *B) { return A->Addr < B->Addr; }); for (const MMap *M : MIL->MMaps) { - OS << (M == MIL->MMaps.front() ? ' ' : '-'); - highlightValue(); - OS << formatv("{0:x}", M->Addr); - highlight(); - OS << '('; - highlightValue(); - OS << M->Mode; - highlight(); + OS << (M == MIL->MMaps.front() ? ' ' : ','); + OS << '['; + printValue(formatv("{0:x}", M->Addr)); + OS << '-'; + printValue(formatv("{0:x}", M->Addr + M->Size - 1)); + OS << "]("; + printValue(M->Mode); OS << ')'; } OS << "]]]" << lineEnding(); restoreColor(); MIL.reset(); } // Handle a node that is known not to be a contextual element. void MarkupFilter::filterNode(const MarkupNode &Node) { if (!checkTag(Node)) return; if (tryPresentation(Node)) return; if (trySGR(Node)) return; OS << Node.Text; } bool MarkupFilter::tryPresentation(const MarkupNode &Node) { - return trySymbol(Node); + if (trySymbol(Node)) + return true; + if (tryPC(Node)) + return true; + if (tryBackTrace(Node)) + return true; + return tryData(Node); } bool MarkupFilter::trySymbol(const MarkupNode &Node) { if (Node.Tag != "symbol") return false; if (!checkNumFields(Node, 1)) return true; highlight(); OS << llvm::demangle(Node.Fields.front().str()); restoreColor(); return true; } +bool MarkupFilter::tryPC(const MarkupNode &Node) { + if (Node.Tag != "pc") + return false; + if (!checkNumFieldsAtLeast(Node, 1)) + return true; + if (!checkNumFieldsAtMost(Node, 2)) + return true; + + Optional Addr = parseAddr(Node.Fields[0]); + if (!Addr) + return true; + + // PC addresses that aren't part of a backtrace are assumed to be precise code + // locations. + PCType Type = PCType::PreciseCode; + if (Node.Fields.size() == 2) { + Optional ParsedType = parsePCType(Node.Fields[1]); + if (!ParsedType) + return true; + Type = *ParsedType; + } + *Addr = adjustAddr(*Addr, Type); + + const MMap *MMap = getContainingMMap(*Addr); + if (!MMap) { + WithColor::error() << "no mmap covers address\n"; + reportLocation(Node.Fields[0].begin()); + printRawElement(Node); + return true; + } + + Expected LI = Symbolizer.symbolizeCode( + MMap->Mod->BuildID, {MMap->getModuleRelativeAddr(*Addr)}); + if (!LI) { + WithColor::defaultErrorHandler(LI.takeError()); + printRawElement(Node); + return true; + } + if (!*LI) { + printRawElement(Node); + return true; + } + + highlight(); + printValue(LI->FunctionName); + OS << '['; + printValue(LI->FileName); + OS << ':'; + printValue(Twine(LI->Line)); + OS << ']'; + restoreColor(); + return true; +} + +bool MarkupFilter::tryBackTrace(const MarkupNode &Node) { + if (Node.Tag != "bt") + return false; + if (!checkNumFieldsAtLeast(Node, 2)) + return true; + if (!checkNumFieldsAtMost(Node, 3)) + return true; + + Optional FrameNumber = parseFrameNumber(Node.Fields[0]); + if (!FrameNumber) + return true; + + Optional Addr = parseAddr(Node.Fields[1]); + if (!Addr) + return true; + + // Backtrace addresses are assumed to be return addresses by default. + PCType Type = PCType::ReturnAddress; + if (Node.Fields.size() == 3) { + Optional ParsedType = parsePCType(Node.Fields[2]); + if (!ParsedType) + return true; + Type = *ParsedType; + } + *Addr = adjustAddr(*Addr, Type); + + const MMap *MMap = getContainingMMap(*Addr); + if (!MMap) { + WithColor::error() << "no mmap covers address\n"; + reportLocation(Node.Fields[0].begin()); + printRawElement(Node); + return true; + } + uint64_t MRA = MMap->getModuleRelativeAddr(*Addr); + + Expected II = + Symbolizer.symbolizeInlinedCode(MMap->Mod->BuildID, {MRA}); + if (!II) { + WithColor::defaultErrorHandler(II.takeError()); + printRawElement(Node); + return true; + } + + highlight(); + for (unsigned I = 0, E = II->getNumberOfFrames(); I != E; ++I) { + auto Header = formatv("{0, +6}", formatv("#{0}", FrameNumber)).sstr<16>(); + // Don't highlight the # sign as a value. + size_t NumberIdx = Header.find("#") + 1; + OS << Header.substr(0, NumberIdx); + printValue(Header.substr(NumberIdx)); + if (I == E - 1) { + OS << " "; + } else { + OS << '.'; + printValue(formatv("{0, -2}", I + 1)); + } + printValue(formatv(" {0:x16} ", *Addr)); + + DILineInfo LI = II->getFrame(I); + if (LI) { + printValue(LI.FunctionName); + OS << ' '; + printValue(LI.FileName); + OS << ':'; + printValue(Twine(LI.Line)); + OS << ':'; + printValue(Twine(LI.Column)); + OS << ' '; + } + OS << '('; + printValue(MMap->Mod->Name); + OS << "+"; + printValue(formatv("{0:x}", MRA)); + OS << ')'; + if (I != E - 1) + OS << lineEnding(); + } + restoreColor(); + return true; +} + +bool MarkupFilter::tryData(const MarkupNode &Node) { + if (Node.Tag != "data") + return false; + if (!checkNumFields(Node, 1)) + return true; + Optional Addr = parseAddr(Node.Fields[0]); + if (!Addr) + return true; + + const MMap *MMap = getContainingMMap(*Addr); + if (!MMap) { + WithColor::error() << "no mmap covers address\n"; + reportLocation(Node.Fields[0].begin()); + printRawElement(Node); + return true; + } + + Expected Symbol = Symbolizer.symbolizeData( + MMap->Mod->BuildID, {MMap->getModuleRelativeAddr(*Addr)}); + if (!Symbol) { + WithColor::defaultErrorHandler(Symbol.takeError()); + printRawElement(Node); + return true; + } + + highlight(); + OS << Symbol->Name; + restoreColor(); + return true; +} + bool MarkupFilter::trySGR(const MarkupNode &Node) { if (Node.Text == "\033[0m") { resetColor(); return true; } if (Node.Text == "\033[1m") { Bold = true; if (ColorsEnabled) OS.changeColor(raw_ostream::Colors::SAVEDCOLOR, Bold); return true; } auto SGRColor = StringSwitch>(Node.Text) .Case("\033[30m", raw_ostream::Colors::BLACK) .Case("\033[31m", raw_ostream::Colors::RED) .Case("\033[32m", raw_ostream::Colors::GREEN) .Case("\033[33m", raw_ostream::Colors::YELLOW) .Case("\033[34m", raw_ostream::Colors::BLUE) .Case("\033[35m", raw_ostream::Colors::MAGENTA) .Case("\033[36m", raw_ostream::Colors::CYAN) .Case("\033[37m", raw_ostream::Colors::WHITE) .Default(llvm::None); if (SGRColor) { Color = *SGRColor; if (ColorsEnabled) OS.changeColor(*Color); return true; } return false; } // Begin highlighting text by picking a different color than the current color // state. void MarkupFilter::highlight() { if (!ColorsEnabled) return; OS.changeColor(Color == raw_ostream::Colors::BLUE ? raw_ostream::Colors::CYAN : raw_ostream::Colors::BLUE, Bold); } // Begin highlighting a field within a highlighted markup string. void MarkupFilter::highlightValue() { if (!ColorsEnabled) return; OS.changeColor(raw_ostream::Colors::GREEN, Bold); } // Set the output stream's color to the current color and bold state of the SGR // abstract machine. void MarkupFilter::restoreColor() { if (!ColorsEnabled) return; if (Color) { OS.changeColor(*Color, Bold); } else { OS.resetColor(); if (Bold) OS.changeColor(raw_ostream::Colors::SAVEDCOLOR, Bold); } } // Set the SGR and output stream's color and bold states back to the default. void MarkupFilter::resetColor() { if (!Color && !Bold) return; Color.reset(); Bold = false; if (ColorsEnabled) OS.resetColor(); } +void MarkupFilter::printRawElement(const MarkupNode &Element) { + highlight(); + OS << "[[["; + printValue(Element.Tag); + for (StringRef Field : Element.Fields) { + OS << ':'; + printValue(Field); + } + OS << "]]]"; + restoreColor(); +} + +void MarkupFilter::printValue(Twine Value) { + highlightValue(); + OS << Value; + highlight(); +} + // This macro helps reduce the amount of indirection done through Optional // below, since the usual case upon returning a None Optional is to return None. #define ASSIGN_OR_RETURN_NONE(TYPE, NAME, EXPR) \ auto NAME##Opt = (EXPR); \ if (!NAME##Opt) \ return None; \ TYPE NAME = std::move(*NAME##Opt) Optional MarkupFilter::parseModule(const MarkupNode &Element) const { if (!checkNumFieldsAtLeast(Element, 3)) return None; ASSIGN_OR_RETURN_NONE(uint64_t, ID, parseModuleID(Element.Fields[0])); StringRef Name = Element.Fields[1]; StringRef Type = Element.Fields[2]; if (Type != "elf") { WithColor::error() << "unknown module type\n"; reportLocation(Type.begin()); return None; } if (!checkNumFields(Element, 4)) return None; ASSIGN_OR_RETURN_NONE(SmallVector, BuildID, parseBuildID(Element.Fields[3])); return Module{ID, Name.str(), std::move(BuildID)}; } Optional MarkupFilter::parseMMap(const MarkupNode &Element) const { if (!checkNumFieldsAtLeast(Element, 3)) return None; ASSIGN_OR_RETURN_NONE(uint64_t, Addr, parseAddr(Element.Fields[0])); ASSIGN_OR_RETURN_NONE(uint64_t, Size, parseSize(Element.Fields[1])); StringRef Type = Element.Fields[2]; if (Type != "load") { WithColor::error() << "unknown mmap type\n"; reportLocation(Type.begin()); return None; } if (!checkNumFields(Element, 6)) return None; ASSIGN_OR_RETURN_NONE(uint64_t, ID, parseModuleID(Element.Fields[3])); ASSIGN_OR_RETURN_NONE(std::string, Mode, parseMode(Element.Fields[4])); auto It = Modules.find(ID); if (It == Modules.end()) { WithColor::error() << "unknown module ID\n"; reportLocation(Element.Fields[3].begin()); return None; } ASSIGN_OR_RETURN_NONE(uint64_t, ModuleRelativeAddr, parseAddr(Element.Fields[5])); return MMap{Addr, Size, It->second.get(), std::move(Mode), ModuleRelativeAddr}; } // Parse an address (%p in the spec). Optional MarkupFilter::parseAddr(StringRef Str) const { if (Str.empty()) { reportTypeError(Str, "address"); return None; } if (all_of(Str, [](char C) { return C == '0'; })) return 0; if (!Str.startswith("0x")) { reportTypeError(Str, "address"); return None; } uint64_t Addr; if (Str.drop_front(2).getAsInteger(16, Addr)) { reportTypeError(Str, "address"); return None; } return Addr; } // Parse a module ID (%i in the spec). Optional MarkupFilter::parseModuleID(StringRef Str) const { uint64_t ID; if (Str.getAsInteger(0, ID)) { reportTypeError(Str, "module ID"); return None; } return ID; } // Parse a size (%i in the spec). Optional MarkupFilter::parseSize(StringRef Str) const { uint64_t ID; if (Str.getAsInteger(0, ID)) { reportTypeError(Str, "size"); return None; } return ID; } +// Parse a frame number (%i in the spec). +Optional MarkupFilter::parseFrameNumber(StringRef Str) const { + uint64_t ID; + if (Str.getAsInteger(10, ID)) { + reportTypeError(Str, "frame number"); + return None; + } + return ID; +} + // Parse a build ID (%x in the spec). Optional> MarkupFilter::parseBuildID(StringRef Str) const { std::string Bytes; if (Str.empty() || Str.size() % 2 || !tryGetFromHex(Str, Bytes)) { reportTypeError(Str, "build ID"); return None; } ArrayRef BuildID(reinterpret_cast(Bytes.data()), Bytes.size()); return SmallVector(BuildID.begin(), BuildID.end()); } // Parses the mode string for an mmap element. Optional MarkupFilter::parseMode(StringRef Str) const { if (Str.empty()) { reportTypeError(Str, "mode"); return None; } // Pop off each of r/R, w/W, and x/X from the front, in that order. StringRef Remainder = Str; if (!Remainder.empty() && tolower(Remainder.front()) == 'r') Remainder = Remainder.drop_front(); if (!Remainder.empty() && tolower(Remainder.front()) == 'w') Remainder = Remainder.drop_front(); if (!Remainder.empty() && tolower(Remainder.front()) == 'x') Remainder = Remainder.drop_front(); // If anything remains, then the string wasn't a mode. if (!Remainder.empty()) { reportTypeError(Str, "mode"); return None; } // Normalize the mode. return Str.lower(); } +Optional MarkupFilter::parsePCType(StringRef Str) const { + Optional Type = + StringSwitch>(Str) + .Case("ra", MarkupFilter::PCType::ReturnAddress) + .Case("pc", MarkupFilter::PCType::PreciseCode) + .Default(None); + if (!Type) + reportTypeError(Str, "PC type"); + return Type; +} + bool MarkupFilter::checkTag(const MarkupNode &Node) const { if (any_of(Node.Tag, [](char C) { return C < 'a' || C > 'z'; })) { WithColor::error(errs()) << "tags must be all lowercase characters\n"; reportLocation(Node.Tag.begin()); return false; } return true; } bool MarkupFilter::checkNumFields(const MarkupNode &Element, size_t Size) const { if (Element.Fields.size() != Size) { - WithColor::error(errs()) << "expected " << Size << " fields; found " + WithColor::error(errs()) << "expected " << Size << " field(s); found " << Element.Fields.size() << "\n"; reportLocation(Element.Tag.end()); return false; } return true; } bool MarkupFilter::checkNumFieldsAtLeast(const MarkupNode &Element, size_t Size) const { if (Element.Fields.size() < Size) { WithColor::error(errs()) - << "expected at least " << Size << " fields; found " + << "expected at least " << Size << " field(s); found " + << Element.Fields.size() << "\n"; + reportLocation(Element.Tag.end()); + return false; + } + return true; +} + +bool MarkupFilter::checkNumFieldsAtMost(const MarkupNode &Element, + size_t Size) const { + if (Element.Fields.size() > Size) { + WithColor::error(errs()) + << "expected at most " << Size << " field(s); found " << Element.Fields.size() << "\n"; reportLocation(Element.Tag.end()); return false; } return true; } void MarkupFilter::reportTypeError(StringRef Str, StringRef TypeName) const { WithColor::error(errs()) << "expected " << TypeName << "; found '" << Str << "'\n"; reportLocation(Str.begin()); } // Prints two lines that point out the given location in the current Line using // a caret. The iterator must be within the bounds of the most recent line // passed to beginLine(). void MarkupFilter::reportLocation(StringRef::iterator Loc) const { errs() << Line; WithColor(errs().indent(Loc - Line.begin()), HighlightColor::String) << '^'; errs() << '\n'; } // Checks for an existing mmap that overlaps the given one and returns a // pointer to one of them. -const MarkupFilter::MMap *MarkupFilter::overlappingMMap(const MMap &Map) const { +const MarkupFilter::MMap * +MarkupFilter::getOverlappingMMap(const MMap &Map) const { // If the given map contains the start of another mmap, they overlap. auto I = MMaps.upper_bound(Map.Addr); if (I != MMaps.end() && Map.contains(I->second.Addr)) return &I->second; // If no element starts inside the given mmap, the only possible overlap would // be if the preceding mmap contains the start point of the given mmap. if (I != MMaps.begin()) { --I; if (I->second.contains(Map.Addr)) return &I->second; } return nullptr; } +// Returns the MMap that contains the given address or nullptr if none. +const MarkupFilter::MMap *MarkupFilter::getContainingMMap(uint64_t Addr) const { + // Find the first mmap starting >= Addr. + auto I = MMaps.lower_bound(Addr); + if (I != MMaps.end() && I->second.contains(Addr)) + return &I->second; + + // The previous mmap is the last one starting < Addr. + if (I == MMaps.begin()) + return nullptr; + --I; + return I->second.contains(Addr) ? &I->second : nullptr; +} + +uint64_t MarkupFilter::adjustAddr(uint64_t Addr, PCType Type) const { + // Decrementing return addresses by one moves them into the call instruction. + // The address doesn't have to be the start of the call instruction, just some + // byte on the inside. Subtracting one avoids needing detailed instruction + // length information here. + return Type == MarkupFilter::PCType::ReturnAddress ? Addr - 1 : Addr; +} + StringRef MarkupFilter::lineEnding() const { return Line.endswith("\r\n") ? "\r\n" : "\n"; } bool MarkupFilter::MMap::contains(uint64_t Addr) const { return this->Addr <= Addr && Addr < this->Addr + Size; } + +// Returns the module-relative address for a given virtual address. +uint64_t MarkupFilter::MMap::getModuleRelativeAddr(uint64_t Addr) const { + return Addr - this->Addr + ModuleRelativeAddr; +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index bf520a560404..c0a94cc758bb 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -1,4818 +1,4825 @@ //===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // /// \file /// This is the parent TargetLowering class for hardware code gen /// targets. // //===----------------------------------------------------------------------===// #include "AMDGPUISelLowering.h" #include "AMDGPU.h" #include "AMDGPUInstrInfo.h" #include "AMDGPUMachineFunction.h" #include "GCNSubtarget.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/KnownBits.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; #include "AMDGPUGenCallingConv.inc" static cl::opt AMDGPUBypassSlowDiv( "amdgpu-bypass-slow-div", cl::desc("Skip 64-bit divide for dynamic 32-bit values"), cl::init(true)); // Find a larger type to do a load / store of a vector with. EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) { unsigned StoreSize = VT.getStoreSizeInBits(); if (StoreSize <= 32) return EVT::getIntegerVT(Ctx, StoreSize); assert(StoreSize % 32 == 0 && "Store size not a multiple of 32"); return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32); } unsigned AMDGPUTargetLowering::numBitsUnsigned(SDValue Op, SelectionDAG &DAG) { return DAG.computeKnownBits(Op).countMaxActiveBits(); } unsigned AMDGPUTargetLowering::numBitsSigned(SDValue Op, SelectionDAG &DAG) { // In order for this to be a signed 24-bit value, bit 23, must // be a sign bit. return DAG.ComputeMaxSignificantBits(Op); } AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI) : TargetLowering(TM), Subtarget(&STI) { // Lower floating point store/load to integer store/load to reduce the number // of patterns in tablegen. setOperationAction(ISD::LOAD, MVT::f32, Promote); AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32); setOperationAction(ISD::LOAD, MVT::v2f32, Promote); AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32); setOperationAction(ISD::LOAD, MVT::v3f32, Promote); AddPromotedToType(ISD::LOAD, MVT::v3f32, MVT::v3i32); setOperationAction(ISD::LOAD, MVT::v4f32, Promote); AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32); setOperationAction(ISD::LOAD, MVT::v5f32, Promote); AddPromotedToType(ISD::LOAD, MVT::v5f32, MVT::v5i32); setOperationAction(ISD::LOAD, MVT::v6f32, Promote); AddPromotedToType(ISD::LOAD, MVT::v6f32, MVT::v6i32); setOperationAction(ISD::LOAD, MVT::v7f32, Promote); AddPromotedToType(ISD::LOAD, MVT::v7f32, MVT::v7i32); setOperationAction(ISD::LOAD, MVT::v8f32, Promote); AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32); setOperationAction(ISD::LOAD, MVT::v16f32, Promote); AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32); setOperationAction(ISD::LOAD, MVT::v32f32, Promote); AddPromotedToType(ISD::LOAD, MVT::v32f32, MVT::v32i32); setOperationAction(ISD::LOAD, MVT::i64, Promote); AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32); setOperationAction(ISD::LOAD, MVT::v2i64, Promote); AddPromotedToType(ISD::LOAD, MVT::v2i64, MVT::v4i32); setOperationAction(ISD::LOAD, MVT::f64, Promote); AddPromotedToType(ISD::LOAD, MVT::f64, MVT::v2i32); setOperationAction(ISD::LOAD, MVT::v2f64, Promote); AddPromotedToType(ISD::LOAD, MVT::v2f64, MVT::v4i32); setOperationAction(ISD::LOAD, MVT::v3i64, Promote); AddPromotedToType(ISD::LOAD, MVT::v3i64, MVT::v6i32); setOperationAction(ISD::LOAD, MVT::v4i64, Promote); AddPromotedToType(ISD::LOAD, MVT::v4i64, MVT::v8i32); setOperationAction(ISD::LOAD, MVT::v3f64, Promote); AddPromotedToType(ISD::LOAD, MVT::v3f64, MVT::v6i32); setOperationAction(ISD::LOAD, MVT::v4f64, Promote); AddPromotedToType(ISD::LOAD, MVT::v4f64, MVT::v8i32); setOperationAction(ISD::LOAD, MVT::v8i64, Promote); AddPromotedToType(ISD::LOAD, MVT::v8i64, MVT::v16i32); setOperationAction(ISD::LOAD, MVT::v8f64, Promote); AddPromotedToType(ISD::LOAD, MVT::v8f64, MVT::v16i32); setOperationAction(ISD::LOAD, MVT::v16i64, Promote); AddPromotedToType(ISD::LOAD, MVT::v16i64, MVT::v32i32); setOperationAction(ISD::LOAD, MVT::v16f64, Promote); AddPromotedToType(ISD::LOAD, MVT::v16f64, MVT::v32i32); // There are no 64-bit extloads. These should be done as a 32-bit extload and // an extension to 64-bit. for (MVT VT : MVT::integer_valuetypes()) setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i64, VT, Expand); for (MVT VT : MVT::integer_valuetypes()) { if (VT == MVT::i64) continue; for (auto Op : {ISD::SEXTLOAD, ISD::ZEXTLOAD, ISD::EXTLOAD}) { setLoadExtAction(Op, VT, MVT::i1, Promote); setLoadExtAction(Op, VT, MVT::i8, Legal); setLoadExtAction(Op, VT, MVT::i16, Legal); setLoadExtAction(Op, VT, MVT::i32, Expand); } } for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) for (auto MemVT : {MVT::v2i8, MVT::v4i8, MVT::v2i16, MVT::v3i16, MVT::v4i16}) setLoadExtAction({ISD::SEXTLOAD, ISD::ZEXTLOAD, ISD::EXTLOAD}, VT, MemVT, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v3f32, MVT::v3f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v32f32, MVT::v32f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v3f64, MVT::v3f32, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f32, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v16f64, MVT::v16f32, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v3f64, MVT::v3f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v16f64, MVT::v16f16, Expand); setOperationAction(ISD::STORE, MVT::f32, Promote); AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32); setOperationAction(ISD::STORE, MVT::v2f32, Promote); AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32); setOperationAction(ISD::STORE, MVT::v3f32, Promote); AddPromotedToType(ISD::STORE, MVT::v3f32, MVT::v3i32); setOperationAction(ISD::STORE, MVT::v4f32, Promote); AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32); setOperationAction(ISD::STORE, MVT::v5f32, Promote); AddPromotedToType(ISD::STORE, MVT::v5f32, MVT::v5i32); setOperationAction(ISD::STORE, MVT::v6f32, Promote); AddPromotedToType(ISD::STORE, MVT::v6f32, MVT::v6i32); setOperationAction(ISD::STORE, MVT::v7f32, Promote); AddPromotedToType(ISD::STORE, MVT::v7f32, MVT::v7i32); setOperationAction(ISD::STORE, MVT::v8f32, Promote); AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32); setOperationAction(ISD::STORE, MVT::v16f32, Promote); AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32); setOperationAction(ISD::STORE, MVT::v32f32, Promote); AddPromotedToType(ISD::STORE, MVT::v32f32, MVT::v32i32); setOperationAction(ISD::STORE, MVT::i64, Promote); AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32); setOperationAction(ISD::STORE, MVT::v2i64, Promote); AddPromotedToType(ISD::STORE, MVT::v2i64, MVT::v4i32); setOperationAction(ISD::STORE, MVT::f64, Promote); AddPromotedToType(ISD::STORE, MVT::f64, MVT::v2i32); setOperationAction(ISD::STORE, MVT::v2f64, Promote); AddPromotedToType(ISD::STORE, MVT::v2f64, MVT::v4i32); setOperationAction(ISD::STORE, MVT::v3i64, Promote); AddPromotedToType(ISD::STORE, MVT::v3i64, MVT::v6i32); setOperationAction(ISD::STORE, MVT::v3f64, Promote); AddPromotedToType(ISD::STORE, MVT::v3f64, MVT::v6i32); setOperationAction(ISD::STORE, MVT::v4i64, Promote); AddPromotedToType(ISD::STORE, MVT::v4i64, MVT::v8i32); setOperationAction(ISD::STORE, MVT::v4f64, Promote); AddPromotedToType(ISD::STORE, MVT::v4f64, MVT::v8i32); setOperationAction(ISD::STORE, MVT::v8i64, Promote); AddPromotedToType(ISD::STORE, MVT::v8i64, MVT::v16i32); setOperationAction(ISD::STORE, MVT::v8f64, Promote); AddPromotedToType(ISD::STORE, MVT::v8f64, MVT::v16i32); setOperationAction(ISD::STORE, MVT::v16i64, Promote); AddPromotedToType(ISD::STORE, MVT::v16i64, MVT::v32i32); setOperationAction(ISD::STORE, MVT::v16f64, Promote); AddPromotedToType(ISD::STORE, MVT::v16f64, MVT::v32i32); setTruncStoreAction(MVT::i64, MVT::i1, Expand); setTruncStoreAction(MVT::i64, MVT::i8, Expand); setTruncStoreAction(MVT::i64, MVT::i16, Expand); setTruncStoreAction(MVT::i64, MVT::i32, Expand); setTruncStoreAction(MVT::v2i64, MVT::v2i1, Expand); setTruncStoreAction(MVT::v2i64, MVT::v2i8, Expand); setTruncStoreAction(MVT::v2i64, MVT::v2i16, Expand); setTruncStoreAction(MVT::v2i64, MVT::v2i32, Expand); setTruncStoreAction(MVT::f32, MVT::f16, Expand); setTruncStoreAction(MVT::v2f32, MVT::v2f16, Expand); setTruncStoreAction(MVT::v3f32, MVT::v3f16, Expand); setTruncStoreAction(MVT::v4f32, MVT::v4f16, Expand); setTruncStoreAction(MVT::v8f32, MVT::v8f16, Expand); setTruncStoreAction(MVT::v16f32, MVT::v16f16, Expand); setTruncStoreAction(MVT::v32f32, MVT::v32f16, Expand); setTruncStoreAction(MVT::f64, MVT::f16, Expand); setTruncStoreAction(MVT::f64, MVT::f32, Expand); setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand); setTruncStoreAction(MVT::v2f64, MVT::v2f16, Expand); setTruncStoreAction(MVT::v3i64, MVT::v3i32, Expand); setTruncStoreAction(MVT::v3i64, MVT::v3i16, Expand); setTruncStoreAction(MVT::v3f64, MVT::v3f32, Expand); setTruncStoreAction(MVT::v3f64, MVT::v3f16, Expand); setTruncStoreAction(MVT::v4i64, MVT::v4i32, Expand); setTruncStoreAction(MVT::v4i64, MVT::v4i16, Expand); setTruncStoreAction(MVT::v4f64, MVT::v4f32, Expand); setTruncStoreAction(MVT::v4f64, MVT::v4f16, Expand); setTruncStoreAction(MVT::v8f64, MVT::v8f32, Expand); setTruncStoreAction(MVT::v8f64, MVT::v8f16, Expand); setTruncStoreAction(MVT::v16f64, MVT::v16f32, Expand); setTruncStoreAction(MVT::v16f64, MVT::v16f16, Expand); setTruncStoreAction(MVT::v16i64, MVT::v16i16, Expand); setTruncStoreAction(MVT::v16i64, MVT::v16i16, Expand); setTruncStoreAction(MVT::v16i64, MVT::v16i8, Expand); setTruncStoreAction(MVT::v16i64, MVT::v16i8, Expand); setTruncStoreAction(MVT::v16i64, MVT::v16i1, Expand); setOperationAction(ISD::Constant, {MVT::i32, MVT::i64}, Legal); setOperationAction(ISD::ConstantFP, {MVT::f32, MVT::f64}, Legal); setOperationAction({ISD::BR_JT, ISD::BRIND}, MVT::Other, Expand); // This is totally unsupported, just custom lower to produce an error. setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); // Library functions. These default to Expand, but we have instructions // for them. setOperationAction({ISD::FCEIL, ISD::FEXP2, ISD::FPOW, ISD::FLOG2, ISD::FABS, ISD::FFLOOR, ISD::FRINT, ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM}, MVT::f32, Legal); setOperationAction(ISD::FROUND, {MVT::f32, MVT::f64}, Custom); setOperationAction({ISD::FLOG, ISD::FLOG10, ISD::FEXP}, MVT::f32, Custom); setOperationAction(ISD::FNEARBYINT, {MVT::f32, MVT::f64}, Custom); setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Custom); // Expand to fneg + fadd. setOperationAction(ISD::FSUB, MVT::f64, Expand); setOperationAction(ISD::CONCAT_VECTORS, {MVT::v3i32, MVT::v3f32, MVT::v4i32, MVT::v4f32, MVT::v5i32, MVT::v5f32, MVT::v6i32, MVT::v6f32, MVT::v7i32, MVT::v7f32, MVT::v8i32, MVT::v8f32}, Custom); setOperationAction( ISD::EXTRACT_SUBVECTOR, {MVT::v2f16, MVT::v2i16, MVT::v4f16, MVT::v4i16, MVT::v2f32, MVT::v2i32, MVT::v3f32, MVT::v3i32, MVT::v4f32, MVT::v4i32, MVT::v5f32, MVT::v5i32, MVT::v6f32, MVT::v6i32, MVT::v7f32, MVT::v7i32, MVT::v8f32, MVT::v8i32, MVT::v16f16, MVT::v16i16, MVT::v16f32, MVT::v16i32, MVT::v32f32, MVT::v32i32, MVT::v2f64, MVT::v2i64, MVT::v3f64, MVT::v3i64, MVT::v4f64, MVT::v4i64, MVT::v8f64, MVT::v8i64, MVT::v16f64, MVT::v16i64}, Custom); setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); setOperationAction(ISD::FP_TO_FP16, {MVT::f64, MVT::f32}, Custom); const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 }; for (MVT VT : ScalarIntVTs) { // These should use [SU]DIVREM, so set them to expand setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, VT, Expand); // GPU does not have divrem function for signed or unsigned. setOperationAction({ISD::SDIVREM, ISD::UDIVREM}, VT, Custom); // GPU does not have [S|U]MUL_LOHI functions as a single instruction. setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT, Expand); setOperationAction({ISD::BSWAP, ISD::CTTZ, ISD::CTLZ}, VT, Expand); // AMDGPU uses ADDC/SUBC/ADDE/SUBE setOperationAction({ISD::ADDC, ISD::SUBC, ISD::ADDE, ISD::SUBE}, VT, Legal); } // The hardware supports 32-bit FSHR, but not FSHL. setOperationAction(ISD::FSHR, MVT::i32, Legal); // The hardware supports 32-bit ROTR, but not ROTL. setOperationAction(ISD::ROTL, {MVT::i32, MVT::i64}, Expand); setOperationAction(ISD::ROTR, MVT::i64, Expand); setOperationAction({ISD::MULHU, ISD::MULHS}, MVT::i16, Expand); setOperationAction({ISD::MUL, ISD::MULHU, ISD::MULHS}, MVT::i64, Expand); setOperationAction( {ISD::UINT_TO_FP, ISD::SINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT}, MVT::i64, Custom); setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); setOperationAction({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX}, MVT::i32, Legal); setOperationAction( {ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i64, Custom); static const MVT::SimpleValueType VectorIntTypes[] = { MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32, MVT::v6i32, MVT::v7i32}; for (MVT VT : VectorIntTypes) { // Expand the following operations for the current type by default. setOperationAction({ISD::ADD, ISD::AND, ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::MUL, ISD::MULHU, ISD::MULHS, ISD::OR, ISD::SHL, ISD::SRA, ISD::SRL, ISD::ROTL, ISD::ROTR, ISD::SUB, ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::SDIVREM, ISD::UDIVREM, ISD::SELECT, ISD::VSELECT, ISD::SELECT_CC, ISD::XOR, ISD::BSWAP, ISD::CTPOP, ISD::CTTZ, ISD::CTLZ, ISD::VECTOR_SHUFFLE, ISD::SETCC}, VT, Expand); } static const MVT::SimpleValueType FloatVectorTypes[] = { MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32, MVT::v6f32, MVT::v7f32}; for (MVT VT : FloatVectorTypes) { setOperationAction( {ISD::FABS, ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FCEIL, ISD::FCOS, ISD::FDIV, ISD::FEXP2, ISD::FEXP, ISD::FLOG2, ISD::FREM, ISD::FLOG, ISD::FLOG10, ISD::FPOW, ISD::FFLOOR, ISD::FTRUNC, ISD::FMUL, ISD::FMA, ISD::FRINT, ISD::FNEARBYINT, ISD::FSQRT, ISD::FSIN, ISD::FSUB, ISD::FNEG, ISD::VSELECT, ISD::SELECT_CC, ISD::FCOPYSIGN, ISD::VECTOR_SHUFFLE, ISD::SETCC, ISD::FCANONICALIZE}, VT, Expand); } // This causes using an unrolled select operation rather than expansion with // bit operations. This is in general better, but the alternative using BFI // instructions may be better if the select sources are SGPRs. setOperationAction(ISD::SELECT, MVT::v2f32, Promote); AddPromotedToType(ISD::SELECT, MVT::v2f32, MVT::v2i32); setOperationAction(ISD::SELECT, MVT::v3f32, Promote); AddPromotedToType(ISD::SELECT, MVT::v3f32, MVT::v3i32); setOperationAction(ISD::SELECT, MVT::v4f32, Promote); AddPromotedToType(ISD::SELECT, MVT::v4f32, MVT::v4i32); setOperationAction(ISD::SELECT, MVT::v5f32, Promote); AddPromotedToType(ISD::SELECT, MVT::v5f32, MVT::v5i32); setOperationAction(ISD::SELECT, MVT::v6f32, Promote); AddPromotedToType(ISD::SELECT, MVT::v6f32, MVT::v6i32); setOperationAction(ISD::SELECT, MVT::v7f32, Promote); AddPromotedToType(ISD::SELECT, MVT::v7f32, MVT::v7i32); // There are no libcalls of any kind. for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I) setLibcallName(static_cast(I), nullptr); setSchedulingPreference(Sched::RegPressure); setJumpIsExpensive(true); // FIXME: This is only partially true. If we have to do vector compares, any // SGPR pair can be a condition register. If we have a uniform condition, we // are better off doing SALU operations, where there is only one SCC. For now, // we don't have a way of knowing during instruction selection if a condition // will be uniform and we always use vector compares. Assume we are using // vector compares until that is fixed. setHasMultipleConditionRegisters(true); setMinCmpXchgSizeInBits(32); setSupportsUnalignedAtomics(false); PredictableSelectIsExpensive = false; // We want to find all load dependencies for long chains of stores to enable // merging into very wide vectors. The problem is with vectors with > 4 // elements. MergeConsecutiveStores will attempt to merge these because x8/x16 // vectors are a legal type, even though we have to split the loads // usually. When we can more precisely specify load legality per address // space, we should be able to make FindBetterChain/MergeConsecutiveStores // smarter so that they can figure out what to do in 2 iterations without all // N > 4 stores on the same chain. GatherAllAliasesMaxDepth = 16; // memcpy/memmove/memset are expanded in the IR, so we shouldn't need to worry // about these during lowering. MaxStoresPerMemcpy = 0xffffffff; MaxStoresPerMemmove = 0xffffffff; MaxStoresPerMemset = 0xffffffff; // The expansion for 64-bit division is enormous. if (AMDGPUBypassSlowDiv) addBypassSlowDiv(64, 32); setTargetDAGCombine({ISD::BITCAST, ISD::SHL, ISD::SRA, ISD::SRL, ISD::TRUNCATE, ISD::MUL, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHU, ISD::MULHS, ISD::SELECT, ISD::SELECT_CC, ISD::STORE, ISD::FADD, ISD::FSUB, ISD::FNEG, ISD::FABS, ISD::AssertZext, ISD::AssertSext, ISD::INTRINSIC_WO_CHAIN}); } bool AMDGPUTargetLowering::mayIgnoreSignedZero(SDValue Op) const { if (getTargetMachine().Options.NoSignedZerosFPMath) return true; const auto Flags = Op.getNode()->getFlags(); if (Flags.hasNoSignedZeros()) return true; return false; } //===----------------------------------------------------------------------===// // Target Information //===----------------------------------------------------------------------===// LLVM_READNONE static bool fnegFoldsIntoOp(unsigned Opc) { switch (Opc) { case ISD::FADD: case ISD::FSUB: case ISD::FMUL: case ISD::FMA: case ISD::FMAD: case ISD::FMINNUM: case ISD::FMAXNUM: case ISD::FMINNUM_IEEE: case ISD::FMAXNUM_IEEE: case ISD::FSIN: case ISD::FTRUNC: case ISD::FRINT: case ISD::FNEARBYINT: case ISD::FCANONICALIZE: case AMDGPUISD::RCP: case AMDGPUISD::RCP_LEGACY: case AMDGPUISD::RCP_IFLAG: case AMDGPUISD::SIN_HW: case AMDGPUISD::FMUL_LEGACY: case AMDGPUISD::FMIN_LEGACY: case AMDGPUISD::FMAX_LEGACY: case AMDGPUISD::FMED3: // TODO: handle llvm.amdgcn.fma.legacy return true; default: return false; } } /// \p returns true if the operation will definitely need to use a 64-bit /// encoding, and thus will use a VOP3 encoding regardless of the source /// modifiers. LLVM_READONLY static bool opMustUseVOP3Encoding(const SDNode *N, MVT VT) { return N->getNumOperands() > 2 || VT == MVT::f64; } // Most FP instructions support source modifiers, but this could be refined // slightly. LLVM_READONLY static bool hasSourceMods(const SDNode *N) { if (isa(N)) return false; switch (N->getOpcode()) { case ISD::CopyToReg: case ISD::SELECT: case ISD::FDIV: case ISD::FREM: case ISD::INLINEASM: case ISD::INLINEASM_BR: case AMDGPUISD::DIV_SCALE: case ISD::INTRINSIC_W_CHAIN: // TODO: Should really be looking at the users of the bitcast. These are // problematic because bitcasts are used to legalize all stores to integer // types. case ISD::BITCAST: return false; case ISD::INTRINSIC_WO_CHAIN: { switch (cast(N->getOperand(0))->getZExtValue()) { case Intrinsic::amdgcn_interp_p1: case Intrinsic::amdgcn_interp_p2: case Intrinsic::amdgcn_interp_mov: case Intrinsic::amdgcn_interp_p1_f16: case Intrinsic::amdgcn_interp_p2_f16: return false; default: return true; } } default: return true; } } bool AMDGPUTargetLowering::allUsesHaveSourceMods(const SDNode *N, unsigned CostThreshold) { // Some users (such as 3-operand FMA/MAD) must use a VOP3 encoding, and thus // it is truly free to use a source modifier in all cases. If there are // multiple users but for each one will necessitate using VOP3, there will be // a code size increase. Try to avoid increasing code size unless we know it // will save on the instruction count. unsigned NumMayIncreaseSize = 0; MVT VT = N->getValueType(0).getScalarType().getSimpleVT(); // XXX - Should this limit number of uses to check? for (const SDNode *U : N->uses()) { if (!hasSourceMods(U)) return false; if (!opMustUseVOP3Encoding(U, VT)) { if (++NumMayIncreaseSize > CostThreshold) return false; } } return true; } EVT AMDGPUTargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT, ISD::NodeType ExtendKind) const { assert(!VT.isVector() && "only scalar expected"); // Round to the next multiple of 32-bits. unsigned Size = VT.getSizeInBits(); if (Size <= 32) return MVT::i32; return EVT::getIntegerVT(Context, 32 * ((Size + 31) / 32)); } MVT AMDGPUTargetLowering::getVectorIdxTy(const DataLayout &) const { return MVT::i32; } bool AMDGPUTargetLowering::isSelectSupported(SelectSupportKind SelType) const { return true; } // The backend supports 32 and 64 bit floating point immediates. // FIXME: Why are we reporting vectors of FP immediates as legal? bool AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const { EVT ScalarVT = VT.getScalarType(); return (ScalarVT == MVT::f32 || ScalarVT == MVT::f64 || (ScalarVT == MVT::f16 && Subtarget->has16BitInsts())); } // We don't want to shrink f64 / f32 constants. bool AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const { EVT ScalarVT = VT.getScalarType(); return (ScalarVT != MVT::f32 && ScalarVT != MVT::f64); } bool AMDGPUTargetLowering::shouldReduceLoadWidth(SDNode *N, ISD::LoadExtType ExtTy, EVT NewVT) const { // TODO: This may be worth removing. Check regression tests for diffs. if (!TargetLoweringBase::shouldReduceLoadWidth(N, ExtTy, NewVT)) return false; unsigned NewSize = NewVT.getStoreSizeInBits(); // If we are reducing to a 32-bit load or a smaller multi-dword load, // this is always better. if (NewSize >= 32) return true; EVT OldVT = N->getValueType(0); unsigned OldSize = OldVT.getStoreSizeInBits(); MemSDNode *MN = cast(N); unsigned AS = MN->getAddressSpace(); // Do not shrink an aligned scalar load to sub-dword. // Scalar engine cannot do sub-dword loads. if (OldSize >= 32 && NewSize < 32 && MN->getAlign() >= Align(4) && (AS == AMDGPUAS::CONSTANT_ADDRESS || AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT || (isa(N) && AS == AMDGPUAS::GLOBAL_ADDRESS && MN->isInvariant())) && AMDGPUInstrInfo::isUniformMMO(MN->getMemOperand())) return false; // Don't produce extloads from sub 32-bit types. SI doesn't have scalar // extloads, so doing one requires using a buffer_load. In cases where we // still couldn't use a scalar load, using the wider load shouldn't really // hurt anything. // If the old size already had to be an extload, there's no harm in continuing // to reduce the width. return (OldSize < 32); } bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy, EVT CastTy, const SelectionDAG &DAG, const MachineMemOperand &MMO) const { assert(LoadTy.getSizeInBits() == CastTy.getSizeInBits()); if (LoadTy.getScalarType() == MVT::i32) return false; unsigned LScalarSize = LoadTy.getScalarSizeInBits(); unsigned CastScalarSize = CastTy.getScalarSizeInBits(); if ((LScalarSize >= CastScalarSize) && (CastScalarSize < 32)) return false; bool Fast = false; return allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), CastTy, MMO, &Fast) && Fast; } // SI+ has instructions for cttz / ctlz for 32-bit values. This is probably also // profitable with the expansion for 64-bit since it's generally good to // speculate things. // FIXME: These should really have the size as a parameter. bool AMDGPUTargetLowering::isCheapToSpeculateCttz() const { return true; } bool AMDGPUTargetLowering::isCheapToSpeculateCtlz() const { return true; } bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode *N) const { switch (N->getOpcode()) { case ISD::EntryToken: case ISD::TokenFactor: return true; case ISD::INTRINSIC_WO_CHAIN: { unsigned IntrID = cast(N->getOperand(0))->getZExtValue(); switch (IntrID) { case Intrinsic::amdgcn_readfirstlane: case Intrinsic::amdgcn_readlane: return true; } return false; } case ISD::LOAD: if (cast(N)->getMemOperand()->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) return true; return false; case AMDGPUISD::SETCC: // ballot-style instruction return true; } return false; } SDValue AMDGPUTargetLowering::getNegatedExpression( SDValue Op, SelectionDAG &DAG, bool LegalOperations, bool ForCodeSize, NegatibleCost &Cost, unsigned Depth) const { switch (Op.getOpcode()) { case ISD::FMA: case ISD::FMAD: { // Negating a fma is not free if it has users without source mods. if (!allUsesHaveSourceMods(Op.getNode())) return SDValue(); break; } default: break; } return TargetLowering::getNegatedExpression(Op, DAG, LegalOperations, ForCodeSize, Cost, Depth); } //===---------------------------------------------------------------------===// // Target Properties //===---------------------------------------------------------------------===// bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const { assert(VT.isFloatingPoint()); // Packed operations do not have a fabs modifier. return VT == MVT::f32 || VT == MVT::f64 || (Subtarget->has16BitInsts() && VT == MVT::f16); } bool AMDGPUTargetLowering::isFNegFree(EVT VT) const { assert(VT.isFloatingPoint()); // Report this based on the end legalized type. VT = VT.getScalarType(); return VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f16; } bool AMDGPUTargetLowering:: storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem, unsigned AS) const { return true; } bool AMDGPUTargetLowering::aggressivelyPreferBuildVectorSources(EVT VecVT) const { // There are few operations which truly have vector input operands. Any vector // operation is going to involve operations on each component, and a // build_vector will be a copy per element, so it always makes sense to use a // build_vector input in place of the extracted element to avoid a copy into a // super register. // // We should probably only do this if all users are extracts only, but this // should be the common case. return true; } bool AMDGPUTargetLowering::isTruncateFree(EVT Source, EVT Dest) const { // Truncate is just accessing a subregister. unsigned SrcSize = Source.getSizeInBits(); unsigned DestSize = Dest.getSizeInBits(); return DestSize < SrcSize && DestSize % 32 == 0 ; } bool AMDGPUTargetLowering::isTruncateFree(Type *Source, Type *Dest) const { // Truncate is just accessing a subregister. unsigned SrcSize = Source->getScalarSizeInBits(); unsigned DestSize = Dest->getScalarSizeInBits(); if (DestSize== 16 && Subtarget->has16BitInsts()) return SrcSize >= 32; return DestSize < SrcSize && DestSize % 32 == 0; } bool AMDGPUTargetLowering::isZExtFree(Type *Src, Type *Dest) const { unsigned SrcSize = Src->getScalarSizeInBits(); unsigned DestSize = Dest->getScalarSizeInBits(); if (SrcSize == 16 && Subtarget->has16BitInsts()) return DestSize >= 32; return SrcSize == 32 && DestSize == 64; } bool AMDGPUTargetLowering::isZExtFree(EVT Src, EVT Dest) const { // Any register load of a 64-bit value really requires 2 32-bit moves. For all // practical purposes, the extra mov 0 to load a 64-bit is free. As used, // this will enable reducing 64-bit operations the 32-bit, which is always // good. if (Src == MVT::i16) return Dest == MVT::i32 ||Dest == MVT::i64 ; return Src == MVT::i32 && Dest == MVT::i64; } bool AMDGPUTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { return isZExtFree(Val.getValueType(), VT2); } bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const { // There aren't really 64-bit registers, but pairs of 32-bit ones and only a // limited number of native 64-bit operations. Shrinking an operation to fit // in a single 32-bit register should always be helpful. As currently used, // this is much less general than the name suggests, and is only used in // places trying to reduce the sizes of loads. Shrinking loads to < 32-bits is // not profitable, and may actually be harmful. return SrcVT.getSizeInBits() > 32 && DestVT.getSizeInBits() == 32; } //===---------------------------------------------------------------------===// // TargetLowering Callbacks //===---------------------------------------------------------------------===// CCAssignFn *AMDGPUCallLowering::CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) { switch (CC) { case CallingConv::AMDGPU_VS: case CallingConv::AMDGPU_GS: case CallingConv::AMDGPU_PS: case CallingConv::AMDGPU_CS: case CallingConv::AMDGPU_HS: case CallingConv::AMDGPU_ES: case CallingConv::AMDGPU_LS: return CC_AMDGPU; case CallingConv::C: case CallingConv::Fast: case CallingConv::Cold: return CC_AMDGPU_Func; case CallingConv::AMDGPU_Gfx: return CC_SI_Gfx; case CallingConv::AMDGPU_KERNEL: case CallingConv::SPIR_KERNEL: default: report_fatal_error("Unsupported calling convention for call"); } } CCAssignFn *AMDGPUCallLowering::CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg) { switch (CC) { case CallingConv::AMDGPU_KERNEL: case CallingConv::SPIR_KERNEL: llvm_unreachable("kernels should not be handled here"); case CallingConv::AMDGPU_VS: case CallingConv::AMDGPU_GS: case CallingConv::AMDGPU_PS: case CallingConv::AMDGPU_CS: case CallingConv::AMDGPU_HS: case CallingConv::AMDGPU_ES: case CallingConv::AMDGPU_LS: return RetCC_SI_Shader; case CallingConv::AMDGPU_Gfx: return RetCC_SI_Gfx; case CallingConv::C: case CallingConv::Fast: case CallingConv::Cold: return RetCC_AMDGPU_Func; default: report_fatal_error("Unsupported calling convention."); } } /// The SelectionDAGBuilder will automatically promote function arguments /// with illegal types. However, this does not work for the AMDGPU targets /// since the function arguments are stored in memory as these illegal types. /// In order to handle this properly we need to get the original types sizes /// from the LLVM IR Function and fixup the ISD:InputArg values before /// passing them to AnalyzeFormalArguments() /// When the SelectionDAGBuilder computes the Ins, it takes care of splitting /// input values across multiple registers. Each item in the Ins array /// represents a single value that will be stored in registers. Ins[x].VT is /// the value type of the value that will be stored in the register, so /// whatever SDNode we lower the argument to needs to be this type. /// /// In order to correctly lower the arguments we need to know the size of each /// argument. Since Ins[x].VT gives us the size of the register that will /// hold the value, we need to look at Ins[x].ArgVT to see the 'real' type /// for the original function argument so that we can deduce the correct memory /// type to use for Ins[x]. In most cases the correct memory type will be /// Ins[x].ArgVT. However, this will not always be the case. If, for example, /// we have a kernel argument of type v8i8, this argument will be split into /// 8 parts and each part will be represented by its own item in the Ins array. /// For each part the Ins[x].ArgVT will be the v8i8, which is the full type of /// the argument before it was split. From this, we deduce that the memory type /// for each individual part is i8. We pass the memory type as LocVT to the /// calling convention analysis function and the register type (Ins[x].VT) as /// the ValVT. void AMDGPUTargetLowering::analyzeFormalArgumentsCompute( CCState &State, const SmallVectorImpl &Ins) const { const MachineFunction &MF = State.getMachineFunction(); const Function &Fn = MF.getFunction(); LLVMContext &Ctx = Fn.getParent()->getContext(); const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(MF); const unsigned ExplicitOffset = ST.getExplicitKernelArgOffset(Fn); CallingConv::ID CC = Fn.getCallingConv(); Align MaxAlign = Align(1); uint64_t ExplicitArgOffset = 0; const DataLayout &DL = Fn.getParent()->getDataLayout(); unsigned InIndex = 0; for (const Argument &Arg : Fn.args()) { const bool IsByRef = Arg.hasByRefAttr(); Type *BaseArgTy = Arg.getType(); Type *MemArgTy = IsByRef ? Arg.getParamByRefType() : BaseArgTy; Align Alignment = DL.getValueOrABITypeAlignment( IsByRef ? Arg.getParamAlign() : None, MemArgTy); MaxAlign = std::max(Alignment, MaxAlign); uint64_t AllocSize = DL.getTypeAllocSize(MemArgTy); uint64_t ArgOffset = alignTo(ExplicitArgOffset, Alignment) + ExplicitOffset; ExplicitArgOffset = alignTo(ExplicitArgOffset, Alignment) + AllocSize; // We're basically throwing away everything passed into us and starting over // to get accurate in-memory offsets. The "PartOffset" is completely useless // to us as computed in Ins. // // We also need to figure out what type legalization is trying to do to get // the correct memory offsets. SmallVector ValueVTs; SmallVector Offsets; ComputeValueVTs(*this, DL, BaseArgTy, ValueVTs, &Offsets, ArgOffset); for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; ++Value) { uint64_t BasePartOffset = Offsets[Value]; EVT ArgVT = ValueVTs[Value]; EVT MemVT = ArgVT; MVT RegisterVT = getRegisterTypeForCallingConv(Ctx, CC, ArgVT); unsigned NumRegs = getNumRegistersForCallingConv(Ctx, CC, ArgVT); if (NumRegs == 1) { // This argument is not split, so the IR type is the memory type. if (ArgVT.isExtended()) { // We have an extended type, like i24, so we should just use the // register type. MemVT = RegisterVT; } else { MemVT = ArgVT; } } else if (ArgVT.isVector() && RegisterVT.isVector() && ArgVT.getScalarType() == RegisterVT.getScalarType()) { assert(ArgVT.getVectorNumElements() > RegisterVT.getVectorNumElements()); // We have a vector value which has been split into a vector with // the same scalar type, but fewer elements. This should handle // all the floating-point vector types. MemVT = RegisterVT; } else if (ArgVT.isVector() && ArgVT.getVectorNumElements() == NumRegs) { // This arg has been split so that each element is stored in a separate // register. MemVT = ArgVT.getScalarType(); } else if (ArgVT.isExtended()) { // We have an extended type, like i65. MemVT = RegisterVT; } else { unsigned MemoryBits = ArgVT.getStoreSizeInBits() / NumRegs; assert(ArgVT.getStoreSizeInBits() % NumRegs == 0); if (RegisterVT.isInteger()) { MemVT = EVT::getIntegerVT(State.getContext(), MemoryBits); } else if (RegisterVT.isVector()) { assert(!RegisterVT.getScalarType().isFloatingPoint()); unsigned NumElements = RegisterVT.getVectorNumElements(); assert(MemoryBits % NumElements == 0); // This vector type has been split into another vector type with // a different elements size. EVT ScalarVT = EVT::getIntegerVT(State.getContext(), MemoryBits / NumElements); MemVT = EVT::getVectorVT(State.getContext(), ScalarVT, NumElements); } else { llvm_unreachable("cannot deduce memory type."); } } // Convert one element vectors to scalar. if (MemVT.isVector() && MemVT.getVectorNumElements() == 1) MemVT = MemVT.getScalarType(); // Round up vec3/vec5 argument. if (MemVT.isVector() && !MemVT.isPow2VectorType()) { assert(MemVT.getVectorNumElements() == 3 || MemVT.getVectorNumElements() == 5); MemVT = MemVT.getPow2VectorType(State.getContext()); } else if (!MemVT.isSimple() && !MemVT.isVector()) { MemVT = MemVT.getRoundIntegerType(State.getContext()); } unsigned PartOffset = 0; for (unsigned i = 0; i != NumRegs; ++i) { State.addLoc(CCValAssign::getCustomMem(InIndex++, RegisterVT, BasePartOffset + PartOffset, MemVT.getSimpleVT(), CCValAssign::Full)); PartOffset += MemVT.getStoreSize(); } } } } SDValue AMDGPUTargetLowering::LowerReturn( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &DL, SelectionDAG &DAG) const { // FIXME: Fails for r600 tests //assert(!isVarArg && Outs.empty() && OutVals.empty() && // "wave terminate should not have return values"); return DAG.getNode(AMDGPUISD::ENDPGM, DL, MVT::Other, Chain); } //===---------------------------------------------------------------------===// // Target specific lowering //===---------------------------------------------------------------------===// /// Selects the correct CCAssignFn for a given CallingConvention value. CCAssignFn *AMDGPUTargetLowering::CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) { return AMDGPUCallLowering::CCAssignFnForCall(CC, IsVarArg); } CCAssignFn *AMDGPUTargetLowering::CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg) { return AMDGPUCallLowering::CCAssignFnForReturn(CC, IsVarArg); } SDValue AMDGPUTargetLowering::addTokenForArgument(SDValue Chain, SelectionDAG &DAG, MachineFrameInfo &MFI, int ClobberedFI) const { SmallVector ArgChains; int64_t FirstByte = MFI.getObjectOffset(ClobberedFI); int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1; // Include the original chain at the beginning of the list. When this is // used by target LowerCall hooks, this helps legalize find the // CALLSEQ_BEGIN node. ArgChains.push_back(Chain); // Add a chain value for each stack argument corresponding for (SDNode *U : DAG.getEntryNode().getNode()->uses()) { if (LoadSDNode *L = dyn_cast(U)) { if (FrameIndexSDNode *FI = dyn_cast(L->getBasePtr())) { if (FI->getIndex() < 0) { int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex()); int64_t InLastByte = InFirstByte; InLastByte += MFI.getObjectSize(FI->getIndex()) - 1; if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) || (FirstByte <= InFirstByte && InFirstByte <= LastByte)) ArgChains.push_back(SDValue(L, 1)); } } } } // Build a tokenfactor for all the chains. return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains); } SDValue AMDGPUTargetLowering::lowerUnhandledCall(CallLoweringInfo &CLI, SmallVectorImpl &InVals, StringRef Reason) const { SDValue Callee = CLI.Callee; SelectionDAG &DAG = CLI.DAG; const Function &Fn = DAG.getMachineFunction().getFunction(); StringRef FuncName(""); if (const ExternalSymbolSDNode *G = dyn_cast(Callee)) FuncName = G->getSymbol(); else if (const GlobalAddressSDNode *G = dyn_cast(Callee)) FuncName = G->getGlobal()->getName(); DiagnosticInfoUnsupported NoCalls( Fn, Reason + FuncName, CLI.DL.getDebugLoc()); DAG.getContext()->diagnose(NoCalls); if (!CLI.IsTailCall) { for (unsigned I = 0, E = CLI.Ins.size(); I != E; ++I) InVals.push_back(DAG.getUNDEF(CLI.Ins[I].VT)); } return DAG.getEntryNode(); } SDValue AMDGPUTargetLowering::LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { return lowerUnhandledCall(CLI, InVals, "unsupported call to function "); } SDValue AMDGPUTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { const Function &Fn = DAG.getMachineFunction().getFunction(); DiagnosticInfoUnsupported NoDynamicAlloca(Fn, "unsupported dynamic alloca", SDLoc(Op).getDebugLoc()); DAG.getContext()->diagnose(NoDynamicAlloca); auto Ops = {DAG.getConstant(0, SDLoc(), Op.getValueType()), Op.getOperand(0)}; return DAG.getMergeValues(Ops, SDLoc()); } SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: Op->print(errs(), &DAG); llvm_unreachable("Custom lowering code for this " "instruction is not implemented yet!"); break; case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG); case ISD::UDIVREM: return LowerUDIVREM(Op, DAG); case ISD::SDIVREM: return LowerSDIVREM(Op, DAG); case ISD::FREM: return LowerFREM(Op, DAG); case ISD::FCEIL: return LowerFCEIL(Op, DAG); case ISD::FTRUNC: return LowerFTRUNC(Op, DAG); case ISD::FRINT: return LowerFRINT(Op, DAG); case ISD::FNEARBYINT: return LowerFNEARBYINT(Op, DAG); case ISD::FROUND: return LowerFROUND(Op, DAG); case ISD::FFLOOR: return LowerFFLOOR(Op, DAG); case ISD::FLOG: return LowerFLOG(Op, DAG, numbers::ln2f); case ISD::FLOG10: return LowerFLOG(Op, DAG, numbers::ln2f / numbers::ln10f); case ISD::FEXP: return lowerFEXP(Op, DAG); case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); case ISD::FP_TO_FP16: return LowerFP_TO_FP16(Op, DAG); case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ_CTTZ(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); } return Op; } void AMDGPUTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { switch (N->getOpcode()) { case ISD::SIGN_EXTEND_INREG: // Different parts of legalization seem to interpret which type of // sign_extend_inreg is the one to check for custom lowering. The extended // from type is what really matters, but some places check for custom // lowering of the result type. This results in trying to use // ReplaceNodeResults to sext_in_reg to an illegal type, so we'll just do // nothing here and let the illegal result integer be handled normally. return; default: return; } } SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, SDValue Op, SelectionDAG &DAG) const { const DataLayout &DL = DAG.getDataLayout(); GlobalAddressSDNode *G = cast(Op); const GlobalValue *GV = G->getGlobal(); if (G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || G->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) { if (!MFI->isModuleEntryFunction() && !GV->getName().equals("llvm.amdgcn.module.lds")) { SDLoc DL(Op); const Function &Fn = DAG.getMachineFunction().getFunction(); DiagnosticInfoUnsupported BadLDSDecl( Fn, "local memory global used by non-kernel function", DL.getDebugLoc(), DS_Warning); DAG.getContext()->diagnose(BadLDSDecl); // We currently don't have a way to correctly allocate LDS objects that // aren't directly associated with a kernel. We do force inlining of // functions that use local objects. However, if these dead functions are // not eliminated, we don't want a compile time error. Just emit a warning // and a trap, since there should be no callable path here. SDValue Trap = DAG.getNode(ISD::TRAP, DL, MVT::Other, DAG.getEntryNode()); SDValue OutputChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Trap, DAG.getRoot()); DAG.setRoot(OutputChain); return DAG.getUNDEF(Op.getValueType()); } // XXX: What does the value of G->getOffset() mean? assert(G->getOffset() == 0 && "Do not know what to do with an non-zero offset"); // TODO: We could emit code to handle the initialization somewhere. // We ignore the initializer for now and legalize it to allow selection. // The initializer will anyway get errored out during assembly emission. unsigned Offset = MFI->allocateLDSGlobal(DL, *cast(GV)); return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType()); } return SDValue(); } SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { SmallVector Args; EVT VT = Op.getValueType(); if (VT == MVT::v4i16 || VT == MVT::v4f16) { SDLoc SL(Op); SDValue Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i32, Op.getOperand(0)); SDValue Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i32, Op.getOperand(1)); SDValue BV = DAG.getBuildVector(MVT::v2i32, SL, { Lo, Hi }); return DAG.getNode(ISD::BITCAST, SL, VT, BV); } for (const SDUse &U : Op->ops()) DAG.ExtractVectorElements(U.get(), Args); return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Args); } SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const { SmallVector Args; unsigned Start = cast(Op.getOperand(1))->getZExtValue(); EVT VT = Op.getValueType(); EVT SrcVT = Op.getOperand(0).getValueType(); // For these types, we have some TableGen patterns except if the index is 1 if (((SrcVT == MVT::v4f16 && VT == MVT::v2f16) || (SrcVT == MVT::v4i16 && VT == MVT::v2i16)) && Start != 1) return Op; if (((SrcVT == MVT::v8f16 && VT == MVT::v4f16) || (SrcVT == MVT::v8i16 && VT == MVT::v4i16)) && (Start == 0 || Start == 4)) return Op; if (((SrcVT == MVT::v16f16 && VT == MVT::v8f16) || (SrcVT == MVT::v16i16 && VT == MVT::v8i16)) && (Start == 0 || Start == 8)) return Op; DAG.ExtractVectorElements(Op.getOperand(0), Args, Start, VT.getVectorNumElements()); return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Args); } /// Generate Min/Max node SDValue AMDGPUTargetLowering::combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const { if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) return SDValue(); SelectionDAG &DAG = DCI.DAG; ISD::CondCode CCOpcode = cast(CC)->get(); switch (CCOpcode) { case ISD::SETOEQ: case ISD::SETONE: case ISD::SETUNE: case ISD::SETNE: case ISD::SETUEQ: case ISD::SETEQ: case ISD::SETFALSE: case ISD::SETFALSE2: case ISD::SETTRUE: case ISD::SETTRUE2: case ISD::SETUO: case ISD::SETO: break; case ISD::SETULE: case ISD::SETULT: { if (LHS == True) return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS); return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS); } case ISD::SETOLE: case ISD::SETOLT: case ISD::SETLE: case ISD::SETLT: { // Ordered. Assume ordered for undefined. // Only do this after legalization to avoid interfering with other combines // which might occur. if (DCI.getDAGCombineLevel() < AfterLegalizeDAG && !DCI.isCalledByLegalizer()) return SDValue(); // We need to permute the operands to get the correct NaN behavior. The // selected operand is the second one based on the failing compare with NaN, // so permute it based on the compare type the hardware uses. if (LHS == True) return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS); return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS); } case ISD::SETUGE: case ISD::SETUGT: { if (LHS == True) return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS); return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS); } case ISD::SETGT: case ISD::SETGE: case ISD::SETOGE: case ISD::SETOGT: { if (DCI.getDAGCombineLevel() < AfterLegalizeDAG && !DCI.isCalledByLegalizer()) return SDValue(); if (LHS == True) return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS); return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS); } case ISD::SETCC_INVALID: llvm_unreachable("Invalid setcc condcode!"); } return SDValue(); } std::pair AMDGPUTargetLowering::split64BitValue(SDValue Op, SelectionDAG &DAG) const { SDLoc SL(Op); SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op); const SDValue Zero = DAG.getConstant(0, SL, MVT::i32); const SDValue One = DAG.getConstant(1, SL, MVT::i32); SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, Zero); SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, One); return std::make_pair(Lo, Hi); } SDValue AMDGPUTargetLowering::getLoHalf64(SDValue Op, SelectionDAG &DAG) const { SDLoc SL(Op); SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op); const SDValue Zero = DAG.getConstant(0, SL, MVT::i32); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, Zero); } SDValue AMDGPUTargetLowering::getHiHalf64(SDValue Op, SelectionDAG &DAG) const { SDLoc SL(Op); SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op); const SDValue One = DAG.getConstant(1, SL, MVT::i32); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, One); } // Split a vector type into two parts. The first part is a power of two vector. // The second part is whatever is left over, and is a scalar if it would // otherwise be a 1-vector. std::pair AMDGPUTargetLowering::getSplitDestVTs(const EVT &VT, SelectionDAG &DAG) const { EVT LoVT, HiVT; EVT EltVT = VT.getVectorElementType(); unsigned NumElts = VT.getVectorNumElements(); unsigned LoNumElts = PowerOf2Ceil((NumElts + 1) / 2); LoVT = EVT::getVectorVT(*DAG.getContext(), EltVT, LoNumElts); HiVT = NumElts - LoNumElts == 1 ? EltVT : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts - LoNumElts); return std::make_pair(LoVT, HiVT); } // Split a vector value into two parts of types LoVT and HiVT. HiVT could be // scalar. std::pair AMDGPUTargetLowering::splitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT, SelectionDAG &DAG) const { assert(LoVT.getVectorNumElements() + (HiVT.isVector() ? HiVT.getVectorNumElements() : 1) <= N.getValueType().getVectorNumElements() && "More vector elements requested than available!"); SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N, DAG.getVectorIdxConstant(0, DL)); SDValue Hi = DAG.getNode( HiVT.isVector() ? ISD::EXTRACT_SUBVECTOR : ISD::EXTRACT_VECTOR_ELT, DL, HiVT, N, DAG.getVectorIdxConstant(LoVT.getVectorNumElements(), DL)); return std::make_pair(Lo, Hi); } SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op, SelectionDAG &DAG) const { LoadSDNode *Load = cast(Op); EVT VT = Op.getValueType(); SDLoc SL(Op); // If this is a 2 element vector, we really want to scalarize and not create // weird 1 element vectors. if (VT.getVectorNumElements() == 2) { SDValue Ops[2]; std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(Load, DAG); return DAG.getMergeValues(Ops, SL); } SDValue BasePtr = Load->getBasePtr(); EVT MemVT = Load->getMemoryVT(); const MachinePointerInfo &SrcValue = Load->getMemOperand()->getPointerInfo(); EVT LoVT, HiVT; EVT LoMemVT, HiMemVT; SDValue Lo, Hi; std::tie(LoVT, HiVT) = getSplitDestVTs(VT, DAG); std::tie(LoMemVT, HiMemVT) = getSplitDestVTs(MemVT, DAG); std::tie(Lo, Hi) = splitVector(Op, SL, LoVT, HiVT, DAG); unsigned Size = LoMemVT.getStoreSize(); Align BaseAlign = Load->getAlign(); Align HiAlign = commonAlignment(BaseAlign, Size); SDValue LoLoad = DAG.getExtLoad(Load->getExtensionType(), SL, LoVT, Load->getChain(), BasePtr, SrcValue, LoMemVT, BaseAlign, Load->getMemOperand()->getFlags()); SDValue HiPtr = DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::Fixed(Size)); SDValue HiLoad = DAG.getExtLoad(Load->getExtensionType(), SL, HiVT, Load->getChain(), HiPtr, SrcValue.getWithOffset(LoMemVT.getStoreSize()), HiMemVT, HiAlign, Load->getMemOperand()->getFlags()); SDValue Join; if (LoVT == HiVT) { // This is the case that the vector is power of two so was evenly split. Join = DAG.getNode(ISD::CONCAT_VECTORS, SL, VT, LoLoad, HiLoad); } else { Join = DAG.getNode(ISD::INSERT_SUBVECTOR, SL, VT, DAG.getUNDEF(VT), LoLoad, DAG.getVectorIdxConstant(0, SL)); Join = DAG.getNode( HiVT.isVector() ? ISD::INSERT_SUBVECTOR : ISD::INSERT_VECTOR_ELT, SL, VT, Join, HiLoad, DAG.getVectorIdxConstant(LoVT.getVectorNumElements(), SL)); } SDValue Ops[] = {Join, DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoLoad.getValue(1), HiLoad.getValue(1))}; return DAG.getMergeValues(Ops, SL); } SDValue AMDGPUTargetLowering::WidenOrSplitVectorLoad(SDValue Op, SelectionDAG &DAG) const { LoadSDNode *Load = cast(Op); EVT VT = Op.getValueType(); SDValue BasePtr = Load->getBasePtr(); EVT MemVT = Load->getMemoryVT(); SDLoc SL(Op); const MachinePointerInfo &SrcValue = Load->getMemOperand()->getPointerInfo(); Align BaseAlign = Load->getAlign(); unsigned NumElements = MemVT.getVectorNumElements(); // Widen from vec3 to vec4 when the load is at least 8-byte aligned // or 16-byte fully dereferenceable. Otherwise, split the vector load. if (NumElements != 3 || (BaseAlign < Align(8) && !SrcValue.isDereferenceable(16, *DAG.getContext(), DAG.getDataLayout()))) return SplitVectorLoad(Op, DAG); assert(NumElements == 3); EVT WideVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), 4); EVT WideMemVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getVectorElementType(), 4); SDValue WideLoad = DAG.getExtLoad( Load->getExtensionType(), SL, WideVT, Load->getChain(), BasePtr, SrcValue, WideMemVT, BaseAlign, Load->getMemOperand()->getFlags()); return DAG.getMergeValues( {DAG.getNode(ISD::EXTRACT_SUBVECTOR, SL, VT, WideLoad, DAG.getVectorIdxConstant(0, SL)), WideLoad.getValue(1)}, SL); } SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op, SelectionDAG &DAG) const { StoreSDNode *Store = cast(Op); SDValue Val = Store->getValue(); EVT VT = Val.getValueType(); // If this is a 2 element vector, we really want to scalarize and not create // weird 1 element vectors. if (VT.getVectorNumElements() == 2) return scalarizeVectorStore(Store, DAG); EVT MemVT = Store->getMemoryVT(); SDValue Chain = Store->getChain(); SDValue BasePtr = Store->getBasePtr(); SDLoc SL(Op); EVT LoVT, HiVT; EVT LoMemVT, HiMemVT; SDValue Lo, Hi; std::tie(LoVT, HiVT) = getSplitDestVTs(VT, DAG); std::tie(LoMemVT, HiMemVT) = getSplitDestVTs(MemVT, DAG); std::tie(Lo, Hi) = splitVector(Val, SL, LoVT, HiVT, DAG); SDValue HiPtr = DAG.getObjectPtrOffset(SL, BasePtr, LoMemVT.getStoreSize()); const MachinePointerInfo &SrcValue = Store->getMemOperand()->getPointerInfo(); Align BaseAlign = Store->getAlign(); unsigned Size = LoMemVT.getStoreSize(); Align HiAlign = commonAlignment(BaseAlign, Size); SDValue LoStore = DAG.getTruncStore(Chain, SL, Lo, BasePtr, SrcValue, LoMemVT, BaseAlign, Store->getMemOperand()->getFlags()); SDValue HiStore = DAG.getTruncStore(Chain, SL, Hi, HiPtr, SrcValue.getWithOffset(Size), HiMemVT, HiAlign, Store->getMemOperand()->getFlags()); return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoStore, HiStore); } // This is a shortcut for integer division because we have fast i32<->f32 // conversions, and fast f32 reciprocal instructions. The fractional part of a // float is enough to accurately represent up to a 24-bit signed integer. SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool Sign) const { SDLoc DL(Op); EVT VT = Op.getValueType(); SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); MVT IntVT = MVT::i32; MVT FltVT = MVT::f32; unsigned LHSSignBits = DAG.ComputeNumSignBits(LHS); if (LHSSignBits < 9) return SDValue(); unsigned RHSSignBits = DAG.ComputeNumSignBits(RHS); if (RHSSignBits < 9) return SDValue(); unsigned BitSize = VT.getSizeInBits(); unsigned SignBits = std::min(LHSSignBits, RHSSignBits); unsigned DivBits = BitSize - SignBits; if (Sign) ++DivBits; ISD::NodeType ToFp = Sign ? ISD::SINT_TO_FP : ISD::UINT_TO_FP; ISD::NodeType ToInt = Sign ? ISD::FP_TO_SINT : ISD::FP_TO_UINT; SDValue jq = DAG.getConstant(1, DL, IntVT); if (Sign) { // char|short jq = ia ^ ib; jq = DAG.getNode(ISD::XOR, DL, VT, LHS, RHS); // jq = jq >> (bitsize - 2) jq = DAG.getNode(ISD::SRA, DL, VT, jq, DAG.getConstant(BitSize - 2, DL, VT)); // jq = jq | 0x1 jq = DAG.getNode(ISD::OR, DL, VT, jq, DAG.getConstant(1, DL, VT)); } // int ia = (int)LHS; SDValue ia = LHS; // int ib, (int)RHS; SDValue ib = RHS; // float fa = (float)ia; SDValue fa = DAG.getNode(ToFp, DL, FltVT, ia); // float fb = (float)ib; SDValue fb = DAG.getNode(ToFp, DL, FltVT, ib); SDValue fq = DAG.getNode(ISD::FMUL, DL, FltVT, fa, DAG.getNode(AMDGPUISD::RCP, DL, FltVT, fb)); // fq = trunc(fq); fq = DAG.getNode(ISD::FTRUNC, DL, FltVT, fq); // float fqneg = -fq; SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FltVT, fq); MachineFunction &MF = DAG.getMachineFunction(); const AMDGPUMachineFunction *MFI = MF.getInfo(); // float fr = mad(fqneg, fb, fa); unsigned OpCode = !Subtarget->hasMadMacF32Insts() ? (unsigned)ISD::FMA : !MFI->getMode().allFP32Denormals() ? (unsigned)ISD::FMAD : (unsigned)AMDGPUISD::FMAD_FTZ; SDValue fr = DAG.getNode(OpCode, DL, FltVT, fqneg, fb, fa); // int iq = (int)fq; SDValue iq = DAG.getNode(ToInt, DL, IntVT, fq); // fr = fabs(fr); fr = DAG.getNode(ISD::FABS, DL, FltVT, fr); // fb = fabs(fb); fb = DAG.getNode(ISD::FABS, DL, FltVT, fb); EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); // int cv = fr >= fb; SDValue cv = DAG.getSetCC(DL, SetCCVT, fr, fb, ISD::SETOGE); // jq = (cv ? jq : 0); jq = DAG.getNode(ISD::SELECT, DL, VT, cv, jq, DAG.getConstant(0, DL, VT)); // dst = iq + jq; SDValue Div = DAG.getNode(ISD::ADD, DL, VT, iq, jq); // Rem needs compensation, it's easier to recompute it SDValue Rem = DAG.getNode(ISD::MUL, DL, VT, Div, RHS); Rem = DAG.getNode(ISD::SUB, DL, VT, LHS, Rem); // Truncate to number of bits this divide really is. if (Sign) { SDValue InRegSize = DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), DivBits)); Div = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Div, InRegSize); Rem = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Rem, InRegSize); } else { SDValue TruncMask = DAG.getConstant((UINT64_C(1) << DivBits) - 1, DL, VT); Div = DAG.getNode(ISD::AND, DL, VT, Div, TruncMask); Rem = DAG.getNode(ISD::AND, DL, VT, Rem, TruncMask); } return DAG.getMergeValues({ Div, Rem }, DL); } void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl &Results) const { SDLoc DL(Op); EVT VT = Op.getValueType(); assert(VT == MVT::i64 && "LowerUDIVREM64 expects an i64"); EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext()); SDValue One = DAG.getConstant(1, DL, HalfVT); SDValue Zero = DAG.getConstant(0, DL, HalfVT); //HiLo split SDValue LHS = Op.getOperand(0); SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, Zero); SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, One); SDValue RHS = Op.getOperand(1); SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, Zero); SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, One); if (DAG.MaskedValueIsZero(RHS, APInt::getHighBitsSet(64, 32)) && DAG.MaskedValueIsZero(LHS, APInt::getHighBitsSet(64, 32))) { SDValue Res = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(HalfVT, HalfVT), LHS_Lo, RHS_Lo); SDValue DIV = DAG.getBuildVector(MVT::v2i32, DL, {Res.getValue(0), Zero}); SDValue REM = DAG.getBuildVector(MVT::v2i32, DL, {Res.getValue(1), Zero}); Results.push_back(DAG.getNode(ISD::BITCAST, DL, MVT::i64, DIV)); Results.push_back(DAG.getNode(ISD::BITCAST, DL, MVT::i64, REM)); return; } if (isTypeLegal(MVT::i64)) { // The algorithm here is based on ideas from "Software Integer Division", // Tom Rodeheffer, August 2008. MachineFunction &MF = DAG.getMachineFunction(); const SIMachineFunctionInfo *MFI = MF.getInfo(); // Compute denominator reciprocal. unsigned FMAD = !Subtarget->hasMadMacF32Insts() ? (unsigned)ISD::FMA : !MFI->getMode().allFP32Denormals() ? (unsigned)ISD::FMAD : (unsigned)AMDGPUISD::FMAD_FTZ; SDValue Cvt_Lo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Lo); SDValue Cvt_Hi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Hi); SDValue Mad1 = DAG.getNode(FMAD, DL, MVT::f32, Cvt_Hi, DAG.getConstantFP(APInt(32, 0x4f800000).bitsToFloat(), DL, MVT::f32), Cvt_Lo); SDValue Rcp = DAG.getNode(AMDGPUISD::RCP, DL, MVT::f32, Mad1); SDValue Mul1 = DAG.getNode(ISD::FMUL, DL, MVT::f32, Rcp, DAG.getConstantFP(APInt(32, 0x5f7ffffc).bitsToFloat(), DL, MVT::f32)); SDValue Mul2 = DAG.getNode(ISD::FMUL, DL, MVT::f32, Mul1, DAG.getConstantFP(APInt(32, 0x2f800000).bitsToFloat(), DL, MVT::f32)); SDValue Trunc = DAG.getNode(ISD::FTRUNC, DL, MVT::f32, Mul2); SDValue Mad2 = DAG.getNode(FMAD, DL, MVT::f32, Trunc, DAG.getConstantFP(APInt(32, 0xcf800000).bitsToFloat(), DL, MVT::f32), Mul1); SDValue Rcp_Lo = DAG.getNode(ISD::FP_TO_UINT, DL, HalfVT, Mad2); SDValue Rcp_Hi = DAG.getNode(ISD::FP_TO_UINT, DL, HalfVT, Trunc); SDValue Rcp64 = DAG.getBitcast(VT, DAG.getBuildVector(MVT::v2i32, DL, {Rcp_Lo, Rcp_Hi})); SDValue Zero64 = DAG.getConstant(0, DL, VT); SDValue One64 = DAG.getConstant(1, DL, VT); SDValue Zero1 = DAG.getConstant(0, DL, MVT::i1); SDVTList HalfCarryVT = DAG.getVTList(HalfVT, MVT::i1); // First round of UNR (Unsigned integer Newton-Raphson). SDValue Neg_RHS = DAG.getNode(ISD::SUB, DL, VT, Zero64, RHS); SDValue Mullo1 = DAG.getNode(ISD::MUL, DL, VT, Neg_RHS, Rcp64); SDValue Mulhi1 = DAG.getNode(ISD::MULHU, DL, VT, Rcp64, Mullo1); SDValue Mulhi1_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi1, Zero); SDValue Mulhi1_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi1, One); SDValue Add1_Lo = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Rcp_Lo, Mulhi1_Lo, Zero1); SDValue Add1_Hi = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Rcp_Hi, Mulhi1_Hi, Add1_Lo.getValue(1)); SDValue Add1 = DAG.getBitcast(VT, DAG.getBuildVector(MVT::v2i32, DL, {Add1_Lo, Add1_Hi})); // Second round of UNR. SDValue Mullo2 = DAG.getNode(ISD::MUL, DL, VT, Neg_RHS, Add1); SDValue Mulhi2 = DAG.getNode(ISD::MULHU, DL, VT, Add1, Mullo2); SDValue Mulhi2_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi2, Zero); SDValue Mulhi2_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi2, One); SDValue Add2_Lo = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Add1_Lo, Mulhi2_Lo, Zero1); SDValue Add2_Hi = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Add1_Hi, Mulhi2_Hi, Add2_Lo.getValue(1)); SDValue Add2 = DAG.getBitcast(VT, DAG.getBuildVector(MVT::v2i32, DL, {Add2_Lo, Add2_Hi})); SDValue Mulhi3 = DAG.getNode(ISD::MULHU, DL, VT, LHS, Add2); SDValue Mul3 = DAG.getNode(ISD::MUL, DL, VT, RHS, Mulhi3); SDValue Mul3_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mul3, Zero); SDValue Mul3_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mul3, One); SDValue Sub1_Lo = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, LHS_Lo, Mul3_Lo, Zero1); SDValue Sub1_Hi = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, LHS_Hi, Mul3_Hi, Sub1_Lo.getValue(1)); SDValue Sub1_Mi = DAG.getNode(ISD::SUB, DL, HalfVT, LHS_Hi, Mul3_Hi); SDValue Sub1 = DAG.getBitcast(VT, DAG.getBuildVector(MVT::v2i32, DL, {Sub1_Lo, Sub1_Hi})); SDValue MinusOne = DAG.getConstant(0xffffffffu, DL, HalfVT); SDValue C1 = DAG.getSelectCC(DL, Sub1_Hi, RHS_Hi, MinusOne, Zero, ISD::SETUGE); SDValue C2 = DAG.getSelectCC(DL, Sub1_Lo, RHS_Lo, MinusOne, Zero, ISD::SETUGE); SDValue C3 = DAG.getSelectCC(DL, Sub1_Hi, RHS_Hi, C2, C1, ISD::SETEQ); // TODO: Here and below portions of the code can be enclosed into if/endif. // Currently control flow is unconditional and we have 4 selects after // potential endif to substitute PHIs. // if C3 != 0 ... SDValue Sub2_Lo = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub1_Lo, RHS_Lo, Zero1); SDValue Sub2_Mi = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub1_Mi, RHS_Hi, Sub1_Lo.getValue(1)); SDValue Sub2_Hi = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub2_Mi, Zero, Sub2_Lo.getValue(1)); SDValue Sub2 = DAG.getBitcast(VT, DAG.getBuildVector(MVT::v2i32, DL, {Sub2_Lo, Sub2_Hi})); SDValue Add3 = DAG.getNode(ISD::ADD, DL, VT, Mulhi3, One64); SDValue C4 = DAG.getSelectCC(DL, Sub2_Hi, RHS_Hi, MinusOne, Zero, ISD::SETUGE); SDValue C5 = DAG.getSelectCC(DL, Sub2_Lo, RHS_Lo, MinusOne, Zero, ISD::SETUGE); SDValue C6 = DAG.getSelectCC(DL, Sub2_Hi, RHS_Hi, C5, C4, ISD::SETEQ); // if (C6 != 0) SDValue Add4 = DAG.getNode(ISD::ADD, DL, VT, Add3, One64); SDValue Sub3_Lo = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub2_Lo, RHS_Lo, Zero1); SDValue Sub3_Mi = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub2_Mi, RHS_Hi, Sub2_Lo.getValue(1)); SDValue Sub3_Hi = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub3_Mi, Zero, Sub3_Lo.getValue(1)); SDValue Sub3 = DAG.getBitcast(VT, DAG.getBuildVector(MVT::v2i32, DL, {Sub3_Lo, Sub3_Hi})); // endif C6 // endif C3 SDValue Sel1 = DAG.getSelectCC(DL, C6, Zero, Add4, Add3, ISD::SETNE); SDValue Div = DAG.getSelectCC(DL, C3, Zero, Sel1, Mulhi3, ISD::SETNE); SDValue Sel2 = DAG.getSelectCC(DL, C6, Zero, Sub3, Sub2, ISD::SETNE); SDValue Rem = DAG.getSelectCC(DL, C3, Zero, Sel2, Sub1, ISD::SETNE); Results.push_back(Div); Results.push_back(Rem); return; } // r600 expandion. // Get Speculative values SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo); SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo); SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, Zero, REM_Part, LHS_Hi, ISD::SETEQ); SDValue REM = DAG.getBuildVector(MVT::v2i32, DL, {REM_Lo, Zero}); REM = DAG.getNode(ISD::BITCAST, DL, MVT::i64, REM); SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, Zero, DIV_Part, Zero, ISD::SETEQ); SDValue DIV_Lo = Zero; const unsigned halfBitWidth = HalfVT.getSizeInBits(); for (unsigned i = 0; i < halfBitWidth; ++i) { const unsigned bitPos = halfBitWidth - i - 1; SDValue POS = DAG.getConstant(bitPos, DL, HalfVT); // Get value of high bit SDValue HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS); HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, One); HBit = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, HBit); // Shift REM = DAG.getNode(ISD::SHL, DL, VT, REM, DAG.getConstant(1, DL, VT)); // Add LHS high bit REM = DAG.getNode(ISD::OR, DL, VT, REM, HBit); SDValue BIT = DAG.getConstant(1ULL << bitPos, DL, HalfVT); SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, Zero, ISD::SETUGE); DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT); // Update REM SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS); REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETUGE); } SDValue DIV = DAG.getBuildVector(MVT::v2i32, DL, {DIV_Lo, DIV_Hi}); DIV = DAG.getNode(ISD::BITCAST, DL, MVT::i64, DIV); Results.push_back(DIV); Results.push_back(REM); } SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); EVT VT = Op.getValueType(); if (VT == MVT::i64) { SmallVector Results; LowerUDIVREM64(Op, DAG, Results); return DAG.getMergeValues(Results, DL); } if (VT == MVT::i32) { if (SDValue Res = LowerDIVREM24(Op, DAG, false)) return Res; } SDValue X = Op.getOperand(0); SDValue Y = Op.getOperand(1); // See AMDGPUCodeGenPrepare::expandDivRem32 for a description of the // algorithm used here. // Initial estimate of inv(y). SDValue Z = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Y); // One round of UNR. SDValue NegY = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Y); SDValue NegYZ = DAG.getNode(ISD::MUL, DL, VT, NegY, Z); Z = DAG.getNode(ISD::ADD, DL, VT, Z, DAG.getNode(ISD::MULHU, DL, VT, Z, NegYZ)); // Quotient/remainder estimate. SDValue Q = DAG.getNode(ISD::MULHU, DL, VT, X, Z); SDValue R = DAG.getNode(ISD::SUB, DL, VT, X, DAG.getNode(ISD::MUL, DL, VT, Q, Y)); // First quotient/remainder refinement. EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); SDValue One = DAG.getConstant(1, DL, VT); SDValue Cond = DAG.getSetCC(DL, CCVT, R, Y, ISD::SETUGE); Q = DAG.getNode(ISD::SELECT, DL, VT, Cond, DAG.getNode(ISD::ADD, DL, VT, Q, One), Q); R = DAG.getNode(ISD::SELECT, DL, VT, Cond, DAG.getNode(ISD::SUB, DL, VT, R, Y), R); // Second quotient/remainder refinement. Cond = DAG.getSetCC(DL, CCVT, R, Y, ISD::SETUGE); Q = DAG.getNode(ISD::SELECT, DL, VT, Cond, DAG.getNode(ISD::ADD, DL, VT, Q, One), Q); R = DAG.getNode(ISD::SELECT, DL, VT, Cond, DAG.getNode(ISD::SUB, DL, VT, R, Y), R); return DAG.getMergeValues({Q, R}, DL); } SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); EVT VT = Op.getValueType(); SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); SDValue Zero = DAG.getConstant(0, DL, VT); SDValue NegOne = DAG.getConstant(-1, DL, VT); if (VT == MVT::i32) { if (SDValue Res = LowerDIVREM24(Op, DAG, true)) return Res; } if (VT == MVT::i64 && DAG.ComputeNumSignBits(LHS) > 32 && DAG.ComputeNumSignBits(RHS) > 32) { EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext()); //HiLo split SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, Zero); SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, Zero); SDValue DIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(HalfVT, HalfVT), LHS_Lo, RHS_Lo); SDValue Res[2] = { DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(0)), DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(1)) }; return DAG.getMergeValues(Res, DL); } SDValue LHSign = DAG.getSelectCC(DL, LHS, Zero, NegOne, Zero, ISD::SETLT); SDValue RHSign = DAG.getSelectCC(DL, RHS, Zero, NegOne, Zero, ISD::SETLT); SDValue DSign = DAG.getNode(ISD::XOR, DL, VT, LHSign, RHSign); SDValue RSign = LHSign; // Remainder sign is the same as LHS LHS = DAG.getNode(ISD::ADD, DL, VT, LHS, LHSign); RHS = DAG.getNode(ISD::ADD, DL, VT, RHS, RHSign); LHS = DAG.getNode(ISD::XOR, DL, VT, LHS, LHSign); RHS = DAG.getNode(ISD::XOR, DL, VT, RHS, RHSign); SDValue Div = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT), LHS, RHS); SDValue Rem = Div.getValue(1); Div = DAG.getNode(ISD::XOR, DL, VT, Div, DSign); Rem = DAG.getNode(ISD::XOR, DL, VT, Rem, RSign); Div = DAG.getNode(ISD::SUB, DL, VT, Div, DSign); Rem = DAG.getNode(ISD::SUB, DL, VT, Rem, RSign); SDValue Res[2] = { Div, Rem }; return DAG.getMergeValues(Res, DL); } // (frem x, y) -> (fma (fneg (ftrunc (fdiv x, y))), y, x) SDValue AMDGPUTargetLowering::LowerFREM(SDValue Op, SelectionDAG &DAG) const { SDLoc SL(Op); EVT VT = Op.getValueType(); auto Flags = Op->getFlags(); SDValue X = Op.getOperand(0); SDValue Y = Op.getOperand(1); SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y, Flags); SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, VT, Div, Flags); SDValue Neg = DAG.getNode(ISD::FNEG, SL, VT, Trunc, Flags); // TODO: For f32 use FMAD instead if !hasFastFMA32? return DAG.getNode(ISD::FMA, SL, VT, Neg, Y, X, Flags); } SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const { SDLoc SL(Op); SDValue Src = Op.getOperand(0); // result = trunc(src) // if (src > 0.0 && src != result) // result += 1.0 SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src); const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64); const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f64); EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64); SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOGT); SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE); SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc); SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, One, Zero); // TODO: Should this propagate fast-math-flags? return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add); } static SDValue extractF64Exponent(SDValue Hi, const SDLoc &SL, SelectionDAG &DAG) { const unsigned FractBits = 52; const unsigned ExpBits = 11; SDValue ExpPart = DAG.getNode(AMDGPUISD::BFE_U32, SL, MVT::i32, Hi, DAG.getConstant(FractBits - 32, SL, MVT::i32), DAG.getConstant(ExpBits, SL, MVT::i32)); SDValue Exp = DAG.getNode(ISD::SUB, SL, MVT::i32, ExpPart, DAG.getConstant(1023, SL, MVT::i32)); return Exp; } SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const { SDLoc SL(Op); SDValue Src = Op.getOperand(0); assert(Op.getValueType() == MVT::f64); const SDValue Zero = DAG.getConstant(0, SL, MVT::i32); // Extract the upper half, since this is where we will find the sign and // exponent. SDValue Hi = getHiHalf64(Src, DAG); SDValue Exp = extractF64Exponent(Hi, SL, DAG); const unsigned FractBits = 52; // Extract the sign bit. const SDValue SignBitMask = DAG.getConstant(UINT32_C(1) << 31, SL, MVT::i32); SDValue SignBit = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, SignBitMask); // Extend back to 64-bits. SDValue SignBit64 = DAG.getBuildVector(MVT::v2i32, SL, {Zero, SignBit}); SignBit64 = DAG.getNode(ISD::BITCAST, SL, MVT::i64, SignBit64); SDValue BcInt = DAG.getNode(ISD::BITCAST, SL, MVT::i64, Src); const SDValue FractMask = DAG.getConstant((UINT64_C(1) << FractBits) - 1, SL, MVT::i64); SDValue Shr = DAG.getNode(ISD::SRA, SL, MVT::i64, FractMask, Exp); SDValue Not = DAG.getNOT(SL, Shr, MVT::i64); SDValue Tmp0 = DAG.getNode(ISD::AND, SL, MVT::i64, BcInt, Not); EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i32); const SDValue FiftyOne = DAG.getConstant(FractBits - 1, SL, MVT::i32); SDValue ExpLt0 = DAG.getSetCC(SL, SetCCVT, Exp, Zero, ISD::SETLT); SDValue ExpGt51 = DAG.getSetCC(SL, SetCCVT, Exp, FiftyOne, ISD::SETGT); SDValue Tmp1 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpLt0, SignBit64, Tmp0); SDValue Tmp2 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpGt51, BcInt, Tmp1); return DAG.getNode(ISD::BITCAST, SL, MVT::f64, Tmp2); } SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const { SDLoc SL(Op); SDValue Src = Op.getOperand(0); assert(Op.getValueType() == MVT::f64); APFloat C1Val(APFloat::IEEEdouble(), "0x1.0p+52"); SDValue C1 = DAG.getConstantFP(C1Val, SL, MVT::f64); SDValue CopySign = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, C1, Src); // TODO: Should this propagate fast-math-flags? SDValue Tmp1 = DAG.getNode(ISD::FADD, SL, MVT::f64, Src, CopySign); SDValue Tmp2 = DAG.getNode(ISD::FSUB, SL, MVT::f64, Tmp1, CopySign); SDValue Fabs = DAG.getNode(ISD::FABS, SL, MVT::f64, Src); APFloat C2Val(APFloat::IEEEdouble(), "0x1.fffffffffffffp+51"); SDValue C2 = DAG.getConstantFP(C2Val, SL, MVT::f64); EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64); SDValue Cond = DAG.getSetCC(SL, SetCCVT, Fabs, C2, ISD::SETOGT); return DAG.getSelect(SL, MVT::f64, Cond, Src, Tmp2); } SDValue AMDGPUTargetLowering::LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const { // FNEARBYINT and FRINT are the same, except in their handling of FP // exceptions. Those aren't really meaningful for us, and OpenCL only has // rint, so just treat them as equivalent. return DAG.getNode(ISD::FRINT, SDLoc(Op), Op.getValueType(), Op.getOperand(0)); } // XXX - May require not supporting f32 denormals? // Don't handle v2f16. The extra instructions to scalarize and repack around the // compare and vselect end up producing worse code than scalarizing the whole // operation. SDValue AMDGPUTargetLowering::LowerFROUND(SDValue Op, SelectionDAG &DAG) const { SDLoc SL(Op); SDValue X = Op.getOperand(0); EVT VT = Op.getValueType(); SDValue T = DAG.getNode(ISD::FTRUNC, SL, VT, X); // TODO: Should this propagate fast-math-flags? SDValue Diff = DAG.getNode(ISD::FSUB, SL, VT, X, T); SDValue AbsDiff = DAG.getNode(ISD::FABS, SL, VT, Diff); const SDValue Zero = DAG.getConstantFP(0.0, SL, VT); const SDValue One = DAG.getConstantFP(1.0, SL, VT); const SDValue Half = DAG.getConstantFP(0.5, SL, VT); SDValue SignOne = DAG.getNode(ISD::FCOPYSIGN, SL, VT, One, X); EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); SDValue Cmp = DAG.getSetCC(SL, SetCCVT, AbsDiff, Half, ISD::SETOGE); SDValue Sel = DAG.getNode(ISD::SELECT, SL, VT, Cmp, SignOne, Zero); return DAG.getNode(ISD::FADD, SL, VT, T, Sel); } SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const { SDLoc SL(Op); SDValue Src = Op.getOperand(0); // result = trunc(src); // if (src < 0.0 && src != result) // result += -1.0. SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src); const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64); const SDValue NegOne = DAG.getConstantFP(-1.0, SL, MVT::f64); EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64); SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOLT); SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE); SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc); SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, NegOne, Zero); // TODO: Should this propagate fast-math-flags? return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add); } SDValue AMDGPUTargetLowering::LowerFLOG(SDValue Op, SelectionDAG &DAG, double Log2BaseInverted) const { EVT VT = Op.getValueType(); SDLoc SL(Op); SDValue Operand = Op.getOperand(0); SDValue Log2Operand = DAG.getNode(ISD::FLOG2, SL, VT, Operand); SDValue Log2BaseInvertedOperand = DAG.getConstantFP(Log2BaseInverted, SL, VT); return DAG.getNode(ISD::FMUL, SL, VT, Log2Operand, Log2BaseInvertedOperand); } // exp2(M_LOG2E_F * f); SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc SL(Op); SDValue Src = Op.getOperand(0); const SDValue K = DAG.getConstantFP(numbers::log2e, SL, VT); SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Src, K, Op->getFlags()); return DAG.getNode(ISD::FEXP2, SL, VT, Mul, Op->getFlags()); } static bool isCtlzOpc(unsigned Opc) { return Opc == ISD::CTLZ || Opc == ISD::CTLZ_ZERO_UNDEF; } static bool isCttzOpc(unsigned Opc) { return Opc == ISD::CTTZ || Opc == ISD::CTTZ_ZERO_UNDEF; } SDValue AMDGPUTargetLowering::LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const { SDLoc SL(Op); SDValue Src = Op.getOperand(0); assert(isCtlzOpc(Op.getOpcode()) || isCttzOpc(Op.getOpcode())); bool Ctlz = isCtlzOpc(Op.getOpcode()); unsigned NewOpc = Ctlz ? AMDGPUISD::FFBH_U32 : AMDGPUISD::FFBL_B32; bool ZeroUndef = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF || Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF; if (Src.getValueType() == MVT::i32) { // (ctlz hi:lo) -> (umin (ffbh src), 32) // (cttz hi:lo) -> (umin (ffbl src), 32) // (ctlz_zero_undef src) -> (ffbh src) // (cttz_zero_undef src) -> (ffbl src) SDValue NewOpr = DAG.getNode(NewOpc, SL, MVT::i32, Src); if (!ZeroUndef) { const SDValue Const32 = DAG.getConstant(32, SL, MVT::i32); NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, NewOpr, Const32); } return NewOpr; } SDValue Lo, Hi; std::tie(Lo, Hi) = split64BitValue(Src, DAG); SDValue OprLo = DAG.getNode(NewOpc, SL, MVT::i32, Lo); SDValue OprHi = DAG.getNode(NewOpc, SL, MVT::i32, Hi); // (ctlz hi:lo) -> (umin3 (ffbh hi), (uaddsat (ffbh lo), 32), 64) // (cttz hi:lo) -> (umin3 (uaddsat (ffbl hi), 32), (ffbl lo), 64) // (ctlz_zero_undef hi:lo) -> (umin (ffbh hi), (add (ffbh lo), 32)) // (cttz_zero_undef hi:lo) -> (umin (add (ffbl hi), 32), (ffbl lo)) unsigned AddOpc = ZeroUndef ? ISD::ADD : ISD::UADDSAT; const SDValue Const32 = DAG.getConstant(32, SL, MVT::i32); if (Ctlz) OprLo = DAG.getNode(AddOpc, SL, MVT::i32, OprLo, Const32); else OprHi = DAG.getNode(AddOpc, SL, MVT::i32, OprHi, Const32); SDValue NewOpr; NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, OprLo, OprHi); if (!ZeroUndef) { const SDValue Const64 = DAG.getConstant(64, SL, MVT::i32); NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, NewOpr, Const64); } return DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i64, NewOpr); } SDValue AMDGPUTargetLowering::LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG, bool Signed) const { // The regular method converting a 64-bit integer to float roughly consists of // 2 steps: normalization and rounding. In fact, after normalization, the // conversion from a 64-bit integer to a float is essentially the same as the // one from a 32-bit integer. The only difference is that it has more // trailing bits to be rounded. To leverage the native 32-bit conversion, a // 64-bit integer could be preprocessed and fit into a 32-bit integer then // converted into the correct float number. The basic steps for the unsigned // conversion are illustrated in the following pseudo code: // // f32 uitofp(i64 u) { // i32 hi, lo = split(u); // // Only count the leading zeros in hi as we have native support of the // // conversion from i32 to f32. If hi is all 0s, the conversion is // // reduced to a 32-bit one automatically. // i32 shamt = clz(hi); // Return 32 if hi is all 0s. // u <<= shamt; // hi, lo = split(u); // hi |= (lo != 0) ? 1 : 0; // Adjust rounding bit in hi based on lo. // // convert it as a 32-bit integer and scale the result back. // return uitofp(hi) * 2^(32 - shamt); // } // // The signed one follows the same principle but uses 'ffbh_i32' to count its // sign bits instead. If 'ffbh_i32' is not available, its absolute value is // converted instead followed by negation based its sign bit. SDLoc SL(Op); SDValue Src = Op.getOperand(0); SDValue Lo, Hi; std::tie(Lo, Hi) = split64BitValue(Src, DAG); SDValue Sign; SDValue ShAmt; if (Signed && Subtarget->isGCN()) { // We also need to consider the sign bit in Lo if Hi has just sign bits, // i.e. Hi is 0 or -1. However, that only needs to take the MSB into // account. That is, the maximal shift is // - 32 if Lo and Hi have opposite signs; // - 33 if Lo and Hi have the same sign. // // Or, MaxShAmt = 33 + OppositeSign, where // // OppositeSign is defined as ((Lo ^ Hi) >> 31), which is // - -1 if Lo and Hi have opposite signs; and // - 0 otherwise. // // All in all, ShAmt is calculated as // // umin(sffbh(Hi), 33 + (Lo^Hi)>>31) - 1. // // or // // umin(sffbh(Hi) - 1, 32 + (Lo^Hi)>>31). // // to reduce the critical path. SDValue OppositeSign = DAG.getNode( ISD::SRA, SL, MVT::i32, DAG.getNode(ISD::XOR, SL, MVT::i32, Lo, Hi), DAG.getConstant(31, SL, MVT::i32)); SDValue MaxShAmt = DAG.getNode(ISD::ADD, SL, MVT::i32, DAG.getConstant(32, SL, MVT::i32), OppositeSign); // Count the leading sign bits. ShAmt = DAG.getNode(AMDGPUISD::FFBH_I32, SL, MVT::i32, Hi); // Different from unsigned conversion, the shift should be one bit less to // preserve the sign bit. ShAmt = DAG.getNode(ISD::SUB, SL, MVT::i32, ShAmt, DAG.getConstant(1, SL, MVT::i32)); ShAmt = DAG.getNode(ISD::UMIN, SL, MVT::i32, ShAmt, MaxShAmt); } else { if (Signed) { // Without 'ffbh_i32', only leading zeros could be counted. Take the // absolute value first. Sign = DAG.getNode(ISD::SRA, SL, MVT::i64, Src, DAG.getConstant(63, SL, MVT::i64)); SDValue Abs = DAG.getNode(ISD::XOR, SL, MVT::i64, DAG.getNode(ISD::ADD, SL, MVT::i64, Src, Sign), Sign); std::tie(Lo, Hi) = split64BitValue(Abs, DAG); } // Count the leading zeros. ShAmt = DAG.getNode(ISD::CTLZ, SL, MVT::i32, Hi); // The shift amount for signed integers is [0, 32]. } // Normalize the given 64-bit integer. SDValue Norm = DAG.getNode(ISD::SHL, SL, MVT::i64, Src, ShAmt); // Split it again. std::tie(Lo, Hi) = split64BitValue(Norm, DAG); // Calculate the adjust bit for rounding. // (lo != 0) ? 1 : 0 => (lo >= 1) ? 1 : 0 => umin(1, lo) SDValue Adjust = DAG.getNode(ISD::UMIN, SL, MVT::i32, DAG.getConstant(1, SL, MVT::i32), Lo); // Get the 32-bit normalized integer. Norm = DAG.getNode(ISD::OR, SL, MVT::i32, Hi, Adjust); // Convert the normalized 32-bit integer into f32. unsigned Opc = (Signed && Subtarget->isGCN()) ? ISD::SINT_TO_FP : ISD::UINT_TO_FP; SDValue FVal = DAG.getNode(Opc, SL, MVT::f32, Norm); // Finally, need to scale back the converted floating number as the original // 64-bit integer is converted as a 32-bit one. ShAmt = DAG.getNode(ISD::SUB, SL, MVT::i32, DAG.getConstant(32, SL, MVT::i32), ShAmt); // On GCN, use LDEXP directly. if (Subtarget->isGCN()) return DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f32, FVal, ShAmt); // Otherwise, align 'ShAmt' to the exponent part and add it into the exponent // part directly to emulate the multiplication of 2^ShAmt. That 8-bit // exponent is enough to avoid overflowing into the sign bit. SDValue Exp = DAG.getNode(ISD::SHL, SL, MVT::i32, ShAmt, DAG.getConstant(23, SL, MVT::i32)); SDValue IVal = DAG.getNode(ISD::ADD, SL, MVT::i32, DAG.getNode(ISD::BITCAST, SL, MVT::i32, FVal), Exp); if (Signed) { // Set the sign bit. Sign = DAG.getNode(ISD::SHL, SL, MVT::i32, DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, Sign), DAG.getConstant(31, SL, MVT::i32)); IVal = DAG.getNode(ISD::OR, SL, MVT::i32, IVal, Sign); } return DAG.getNode(ISD::BITCAST, SL, MVT::f32, IVal); } SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, bool Signed) const { SDLoc SL(Op); SDValue Src = Op.getOperand(0); SDValue Lo, Hi; std::tie(Lo, Hi) = split64BitValue(Src, DAG); SDValue CvtHi = DAG.getNode(Signed ? ISD::SINT_TO_FP : ISD::UINT_TO_FP, SL, MVT::f64, Hi); SDValue CvtLo = DAG.getNode(ISD::UINT_TO_FP, SL, MVT::f64, Lo); SDValue LdExp = DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f64, CvtHi, DAG.getConstant(32, SL, MVT::i32)); // TODO: Should this propagate fast-math-flags? return DAG.getNode(ISD::FADD, SL, MVT::f64, LdExp, CvtLo); } SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { // TODO: Factor out code common with LowerSINT_TO_FP. EVT DestVT = Op.getValueType(); SDValue Src = Op.getOperand(0); EVT SrcVT = Src.getValueType(); if (SrcVT == MVT::i16) { if (DestVT == MVT::f16) return Op; SDLoc DL(Op); // Promote src to i32 SDValue Ext = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Src); return DAG.getNode(ISD::UINT_TO_FP, DL, DestVT, Ext); } assert(SrcVT == MVT::i64 && "operation should be legal"); if (Subtarget->has16BitInsts() && DestVT == MVT::f16) { SDLoc DL(Op); SDValue IntToFp32 = DAG.getNode(Op.getOpcode(), DL, MVT::f32, Src); SDValue FPRoundFlag = DAG.getIntPtrConstant(0, SDLoc(Op)); SDValue FPRound = DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, IntToFp32, FPRoundFlag); return FPRound; } if (DestVT == MVT::f32) return LowerINT_TO_FP32(Op, DAG, false); assert(DestVT == MVT::f64); return LowerINT_TO_FP64(Op, DAG, false); } SDValue AMDGPUTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { EVT DestVT = Op.getValueType(); SDValue Src = Op.getOperand(0); EVT SrcVT = Src.getValueType(); if (SrcVT == MVT::i16) { if (DestVT == MVT::f16) return Op; SDLoc DL(Op); // Promote src to i32 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32, Src); return DAG.getNode(ISD::SINT_TO_FP, DL, DestVT, Ext); } assert(SrcVT == MVT::i64 && "operation should be legal"); // TODO: Factor out code common with LowerUINT_TO_FP. if (Subtarget->has16BitInsts() && DestVT == MVT::f16) { SDLoc DL(Op); SDValue Src = Op.getOperand(0); SDValue IntToFp32 = DAG.getNode(Op.getOpcode(), DL, MVT::f32, Src); SDValue FPRoundFlag = DAG.getIntPtrConstant(0, SDLoc(Op)); SDValue FPRound = DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, IntToFp32, FPRoundFlag); return FPRound; } if (DestVT == MVT::f32) return LowerINT_TO_FP32(Op, DAG, true); assert(DestVT == MVT::f64); return LowerINT_TO_FP64(Op, DAG, true); } SDValue AMDGPUTargetLowering::LowerFP_TO_INT64(SDValue Op, SelectionDAG &DAG, bool Signed) const { SDLoc SL(Op); SDValue Src = Op.getOperand(0); EVT SrcVT = Src.getValueType(); assert(SrcVT == MVT::f32 || SrcVT == MVT::f64); // The basic idea of converting a floating point number into a pair of 32-bit // integers is illustrated as follows: // // tf := trunc(val); // hif := floor(tf * 2^-32); // lof := tf - hif * 2^32; // lof is always positive due to floor. // hi := fptoi(hif); // lo := fptoi(lof); // SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, SrcVT, Src); SDValue Sign; if (Signed && SrcVT == MVT::f32) { // However, a 32-bit floating point number has only 23 bits mantissa and // it's not enough to hold all the significant bits of `lof` if val is // negative. To avoid the loss of precision, We need to take the absolute // value after truncating and flip the result back based on the original // signedness. Sign = DAG.getNode(ISD::SRA, SL, MVT::i32, DAG.getNode(ISD::BITCAST, SL, MVT::i32, Trunc), DAG.getConstant(31, SL, MVT::i32)); Trunc = DAG.getNode(ISD::FABS, SL, SrcVT, Trunc); } SDValue K0, K1; if (SrcVT == MVT::f64) { K0 = DAG.getConstantFP(BitsToDouble(UINT64_C(/*2^-32*/ 0x3df0000000000000)), SL, SrcVT); K1 = DAG.getConstantFP(BitsToDouble(UINT64_C(/*-2^32*/ 0xc1f0000000000000)), SL, SrcVT); } else { K0 = DAG.getConstantFP(BitsToFloat(UINT32_C(/*2^-32*/ 0x2f800000)), SL, SrcVT); K1 = DAG.getConstantFP(BitsToFloat(UINT32_C(/*-2^32*/ 0xcf800000)), SL, SrcVT); } // TODO: Should this propagate fast-math-flags? SDValue Mul = DAG.getNode(ISD::FMUL, SL, SrcVT, Trunc, K0); SDValue FloorMul = DAG.getNode(ISD::FFLOOR, SL, SrcVT, Mul); SDValue Fma = DAG.getNode(ISD::FMA, SL, SrcVT, FloorMul, K1, Trunc); SDValue Hi = DAG.getNode((Signed && SrcVT == MVT::f64) ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, SL, MVT::i32, FloorMul); SDValue Lo = DAG.getNode(ISD::FP_TO_UINT, SL, MVT::i32, Fma); SDValue Result = DAG.getNode(ISD::BITCAST, SL, MVT::i64, DAG.getBuildVector(MVT::v2i32, SL, {Lo, Hi})); if (Signed && SrcVT == MVT::f32) { assert(Sign); // Flip the result based on the signedness, which is either all 0s or 1s. Sign = DAG.getNode(ISD::BITCAST, SL, MVT::i64, DAG.getBuildVector(MVT::v2i32, SL, {Sign, Sign})); // r := xor(r, sign) - sign; Result = DAG.getNode(ISD::SUB, SL, MVT::i64, DAG.getNode(ISD::XOR, SL, MVT::i64, Result, Sign), Sign); } return Result; } SDValue AMDGPUTargetLowering::LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); SDValue N0 = Op.getOperand(0); // Convert to target node to get known bits if (N0.getValueType() == MVT::f32) return DAG.getNode(AMDGPUISD::FP_TO_FP16, DL, Op.getValueType(), N0); if (getTargetMachine().Options.UnsafeFPMath) { // There is a generic expand for FP_TO_FP16 with unsafe fast math. return SDValue(); } assert(N0.getSimpleValueType() == MVT::f64); // f64 -> f16 conversion using round-to-nearest-even rounding mode. const unsigned ExpMask = 0x7ff; const unsigned ExpBiasf64 = 1023; const unsigned ExpBiasf16 = 15; SDValue Zero = DAG.getConstant(0, DL, MVT::i32); SDValue One = DAG.getConstant(1, DL, MVT::i32); SDValue U = DAG.getNode(ISD::BITCAST, DL, MVT::i64, N0); SDValue UH = DAG.getNode(ISD::SRL, DL, MVT::i64, U, DAG.getConstant(32, DL, MVT::i64)); UH = DAG.getZExtOrTrunc(UH, DL, MVT::i32); U = DAG.getZExtOrTrunc(U, DL, MVT::i32); SDValue E = DAG.getNode(ISD::SRL, DL, MVT::i32, UH, DAG.getConstant(20, DL, MVT::i64)); E = DAG.getNode(ISD::AND, DL, MVT::i32, E, DAG.getConstant(ExpMask, DL, MVT::i32)); // Subtract the fp64 exponent bias (1023) to get the real exponent and // add the f16 bias (15) to get the biased exponent for the f16 format. E = DAG.getNode(ISD::ADD, DL, MVT::i32, E, DAG.getConstant(-ExpBiasf64 + ExpBiasf16, DL, MVT::i32)); SDValue M = DAG.getNode(ISD::SRL, DL, MVT::i32, UH, DAG.getConstant(8, DL, MVT::i32)); M = DAG.getNode(ISD::AND, DL, MVT::i32, M, DAG.getConstant(0xffe, DL, MVT::i32)); SDValue MaskedSig = DAG.getNode(ISD::AND, DL, MVT::i32, UH, DAG.getConstant(0x1ff, DL, MVT::i32)); MaskedSig = DAG.getNode(ISD::OR, DL, MVT::i32, MaskedSig, U); SDValue Lo40Set = DAG.getSelectCC(DL, MaskedSig, Zero, Zero, One, ISD::SETEQ); M = DAG.getNode(ISD::OR, DL, MVT::i32, M, Lo40Set); // (M != 0 ? 0x0200 : 0) | 0x7c00; SDValue I = DAG.getNode(ISD::OR, DL, MVT::i32, DAG.getSelectCC(DL, M, Zero, DAG.getConstant(0x0200, DL, MVT::i32), Zero, ISD::SETNE), DAG.getConstant(0x7c00, DL, MVT::i32)); // N = M | (E << 12); SDValue N = DAG.getNode(ISD::OR, DL, MVT::i32, M, DAG.getNode(ISD::SHL, DL, MVT::i32, E, DAG.getConstant(12, DL, MVT::i32))); // B = clamp(1-E, 0, 13); SDValue OneSubExp = DAG.getNode(ISD::SUB, DL, MVT::i32, One, E); SDValue B = DAG.getNode(ISD::SMAX, DL, MVT::i32, OneSubExp, Zero); B = DAG.getNode(ISD::SMIN, DL, MVT::i32, B, DAG.getConstant(13, DL, MVT::i32)); SDValue SigSetHigh = DAG.getNode(ISD::OR, DL, MVT::i32, M, DAG.getConstant(0x1000, DL, MVT::i32)); SDValue D = DAG.getNode(ISD::SRL, DL, MVT::i32, SigSetHigh, B); SDValue D0 = DAG.getNode(ISD::SHL, DL, MVT::i32, D, B); SDValue D1 = DAG.getSelectCC(DL, D0, SigSetHigh, One, Zero, ISD::SETNE); D = DAG.getNode(ISD::OR, DL, MVT::i32, D, D1); SDValue V = DAG.getSelectCC(DL, E, One, D, N, ISD::SETLT); SDValue VLow3 = DAG.getNode(ISD::AND, DL, MVT::i32, V, DAG.getConstant(0x7, DL, MVT::i32)); V = DAG.getNode(ISD::SRL, DL, MVT::i32, V, DAG.getConstant(2, DL, MVT::i32)); SDValue V0 = DAG.getSelectCC(DL, VLow3, DAG.getConstant(3, DL, MVT::i32), One, Zero, ISD::SETEQ); SDValue V1 = DAG.getSelectCC(DL, VLow3, DAG.getConstant(5, DL, MVT::i32), One, Zero, ISD::SETGT); V1 = DAG.getNode(ISD::OR, DL, MVT::i32, V0, V1); V = DAG.getNode(ISD::ADD, DL, MVT::i32, V, V1); V = DAG.getSelectCC(DL, E, DAG.getConstant(30, DL, MVT::i32), DAG.getConstant(0x7c00, DL, MVT::i32), V, ISD::SETGT); V = DAG.getSelectCC(DL, E, DAG.getConstant(1039, DL, MVT::i32), I, V, ISD::SETEQ); // Extract the sign bit. SDValue Sign = DAG.getNode(ISD::SRL, DL, MVT::i32, UH, DAG.getConstant(16, DL, MVT::i32)); Sign = DAG.getNode(ISD::AND, DL, MVT::i32, Sign, DAG.getConstant(0x8000, DL, MVT::i32)); V = DAG.getNode(ISD::OR, DL, MVT::i32, Sign, V); return DAG.getZExtOrTrunc(V, DL, Op.getValueType()); } SDValue AMDGPUTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { SDValue Src = Op.getOperand(0); unsigned OpOpcode = Op.getOpcode(); EVT SrcVT = Src.getValueType(); EVT DestVT = Op.getValueType(); // Will be selected natively if (SrcVT == MVT::f16 && DestVT == MVT::i16) return Op; // Promote i16 to i32 if (DestVT == MVT::i16 && (SrcVT == MVT::f32 || SrcVT == MVT::f64)) { SDLoc DL(Op); SDValue FpToInt32 = DAG.getNode(OpOpcode, DL, MVT::i32, Src); return DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FpToInt32); } if (SrcVT == MVT::f16 || (SrcVT == MVT::f32 && Src.getOpcode() == ISD::FP16_TO_FP)) { SDLoc DL(Op); SDValue FpToInt32 = DAG.getNode(OpOpcode, DL, MVT::i32, Src); unsigned Ext = OpOpcode == ISD::FP_TO_SINT ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; return DAG.getNode(Ext, DL, MVT::i64, FpToInt32); } if (DestVT == MVT::i64 && (SrcVT == MVT::f32 || SrcVT == MVT::f64)) return LowerFP_TO_INT64(Op, DAG, OpOpcode == ISD::FP_TO_SINT); return SDValue(); } SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const { EVT ExtraVT = cast(Op.getOperand(1))->getVT(); MVT VT = Op.getSimpleValueType(); MVT ScalarVT = VT.getScalarType(); assert(VT.isVector()); SDValue Src = Op.getOperand(0); SDLoc DL(Op); // TODO: Don't scalarize on Evergreen? unsigned NElts = VT.getVectorNumElements(); SmallVector Args; DAG.ExtractVectorElements(Src, Args, 0, NElts); SDValue VTOp = DAG.getValueType(ExtraVT.getScalarType()); for (unsigned I = 0; I < NElts; ++I) Args[I] = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ScalarVT, Args[I], VTOp); return DAG.getBuildVector(VT, DL, Args); } //===----------------------------------------------------------------------===// // Custom DAG optimizations //===----------------------------------------------------------------------===// static bool isU24(SDValue Op, SelectionDAG &DAG) { return AMDGPUTargetLowering::numBitsUnsigned(Op, DAG) <= 24; } static bool isI24(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); return VT.getSizeInBits() >= 24 && // Types less than 24-bit should be treated // as unsigned 24-bit values. AMDGPUTargetLowering::numBitsSigned(Op, DAG) <= 24; } static SDValue simplifyMul24(SDNode *Node24, TargetLowering::DAGCombinerInfo &DCI) { SelectionDAG &DAG = DCI.DAG; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); bool IsIntrin = Node24->getOpcode() == ISD::INTRINSIC_WO_CHAIN; SDValue LHS = IsIntrin ? Node24->getOperand(1) : Node24->getOperand(0); SDValue RHS = IsIntrin ? Node24->getOperand(2) : Node24->getOperand(1); unsigned NewOpcode = Node24->getOpcode(); if (IsIntrin) { unsigned IID = cast(Node24->getOperand(0))->getZExtValue(); switch (IID) { case Intrinsic::amdgcn_mul_i24: NewOpcode = AMDGPUISD::MUL_I24; break; case Intrinsic::amdgcn_mul_u24: NewOpcode = AMDGPUISD::MUL_U24; break; case Intrinsic::amdgcn_mulhi_i24: NewOpcode = AMDGPUISD::MULHI_I24; break; case Intrinsic::amdgcn_mulhi_u24: NewOpcode = AMDGPUISD::MULHI_U24; break; default: llvm_unreachable("Expected 24-bit mul intrinsic"); } } APInt Demanded = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 24); // First try to simplify using SimplifyMultipleUseDemandedBits which allows // the operands to have other uses, but will only perform simplifications that // involve bypassing some nodes for this user. SDValue DemandedLHS = TLI.SimplifyMultipleUseDemandedBits(LHS, Demanded, DAG); SDValue DemandedRHS = TLI.SimplifyMultipleUseDemandedBits(RHS, Demanded, DAG); if (DemandedLHS || DemandedRHS) return DAG.getNode(NewOpcode, SDLoc(Node24), Node24->getVTList(), DemandedLHS ? DemandedLHS : LHS, DemandedRHS ? DemandedRHS : RHS); // Now try SimplifyDemandedBits which can simplify the nodes used by our // operands if this node is the only user. if (TLI.SimplifyDemandedBits(LHS, Demanded, DCI)) return SDValue(Node24, 0); if (TLI.SimplifyDemandedBits(RHS, Demanded, DCI)) return SDValue(Node24, 0); return SDValue(); } template static SDValue constantFoldBFE(SelectionDAG &DAG, IntTy Src0, uint32_t Offset, uint32_t Width, const SDLoc &DL) { if (Width + Offset < 32) { uint32_t Shl = static_cast(Src0) << (32 - Offset - Width); IntTy Result = static_cast(Shl) >> (32 - Width); return DAG.getConstant(Result, DL, MVT::i32); } return DAG.getConstant(Src0 >> Offset, DL, MVT::i32); } static bool hasVolatileUser(SDNode *Val) { for (SDNode *U : Val->uses()) { if (MemSDNode *M = dyn_cast(U)) { if (M->isVolatile()) return true; } } return false; } bool AMDGPUTargetLowering::shouldCombineMemoryType(EVT VT) const { // i32 vectors are the canonical memory type. if (VT.getScalarType() == MVT::i32 || isTypeLegal(VT)) return false; if (!VT.isByteSized()) return false; unsigned Size = VT.getStoreSize(); if ((Size == 1 || Size == 2 || Size == 4) && !VT.isVector()) return false; if (Size == 3 || (Size > 4 && (Size % 4 != 0))) return false; return true; } // Replace load of an illegal type with a store of a bitcast to a friendlier // type. SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const { if (!DCI.isBeforeLegalize()) return SDValue(); LoadSDNode *LN = cast(N); if (!LN->isSimple() || !ISD::isNormalLoad(LN) || hasVolatileUser(LN)) return SDValue(); SDLoc SL(N); SelectionDAG &DAG = DCI.DAG; EVT VT = LN->getMemoryVT(); unsigned Size = VT.getStoreSize(); Align Alignment = LN->getAlign(); if (Alignment < Size && isTypeLegal(VT)) { bool IsFast; unsigned AS = LN->getAddressSpace(); // Expand unaligned loads earlier than legalization. Due to visitation order // problems during legalization, the emitted instructions to pack and unpack // the bytes again are not eliminated in the case of an unaligned copy. if (!allowsMisalignedMemoryAccesses( VT, AS, Alignment, LN->getMemOperand()->getFlags(), &IsFast)) { if (VT.isVector()) return SplitVectorLoad(SDValue(LN, 0), DAG); SDValue Ops[2]; std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(LN, DAG); return DAG.getMergeValues(Ops, SDLoc(N)); } if (!IsFast) return SDValue(); } if (!shouldCombineMemoryType(VT)) return SDValue(); EVT NewVT = getEquivalentMemType(*DAG.getContext(), VT); SDValue NewLoad = DAG.getLoad(NewVT, SL, LN->getChain(), LN->getBasePtr(), LN->getMemOperand()); SDValue BC = DAG.getNode(ISD::BITCAST, SL, VT, NewLoad); DCI.CombineTo(N, BC, NewLoad.getValue(1)); return SDValue(N, 0); } // Replace store of an illegal type with a store of a bitcast to a friendlier // type. SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const { if (!DCI.isBeforeLegalize()) return SDValue(); StoreSDNode *SN = cast(N); if (!SN->isSimple() || !ISD::isNormalStore(SN)) return SDValue(); EVT VT = SN->getMemoryVT(); unsigned Size = VT.getStoreSize(); SDLoc SL(N); SelectionDAG &DAG = DCI.DAG; Align Alignment = SN->getAlign(); if (Alignment < Size && isTypeLegal(VT)) { bool IsFast; unsigned AS = SN->getAddressSpace(); // Expand unaligned stores earlier than legalization. Due to visitation // order problems during legalization, the emitted instructions to pack and // unpack the bytes again are not eliminated in the case of an unaligned // copy. if (!allowsMisalignedMemoryAccesses( VT, AS, Alignment, SN->getMemOperand()->getFlags(), &IsFast)) { if (VT.isVector()) return SplitVectorStore(SDValue(SN, 0), DAG); return expandUnalignedStore(SN, DAG); } if (!IsFast) return SDValue(); } if (!shouldCombineMemoryType(VT)) return SDValue(); EVT NewVT = getEquivalentMemType(*DAG.getContext(), VT); SDValue Val = SN->getValue(); //DCI.AddToWorklist(Val.getNode()); bool OtherUses = !Val.hasOneUse(); SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, NewVT, Val); if (OtherUses) { SDValue CastBack = DAG.getNode(ISD::BITCAST, SL, VT, CastVal); DAG.ReplaceAllUsesOfValueWith(Val, CastBack); } return DAG.getStore(SN->getChain(), SL, CastVal, SN->getBasePtr(), SN->getMemOperand()); } // FIXME: This should go in generic DAG combiner with an isTruncateFree check, // but isTruncateFree is inaccurate for i16 now because of SALU vs. VALU // issues. SDValue AMDGPUTargetLowering::performAssertSZExtCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; SDValue N0 = N->getOperand(0); // (vt2 (assertzext (truncate vt0:x), vt1)) -> // (vt2 (truncate (assertzext vt0:x, vt1))) if (N0.getOpcode() == ISD::TRUNCATE) { SDValue N1 = N->getOperand(1); EVT ExtVT = cast(N1)->getVT(); SDLoc SL(N); SDValue Src = N0.getOperand(0); EVT SrcVT = Src.getValueType(); if (SrcVT.bitsGE(ExtVT)) { SDValue NewInReg = DAG.getNode(N->getOpcode(), SL, SrcVT, Src, N1); return DAG.getNode(ISD::TRUNCATE, SL, N->getValueType(0), NewInReg); } } return SDValue(); } SDValue AMDGPUTargetLowering::performIntrinsicWOChainCombine( SDNode *N, DAGCombinerInfo &DCI) const { unsigned IID = cast(N->getOperand(0))->getZExtValue(); switch (IID) { case Intrinsic::amdgcn_mul_i24: case Intrinsic::amdgcn_mul_u24: case Intrinsic::amdgcn_mulhi_i24: case Intrinsic::amdgcn_mulhi_u24: return simplifyMul24(N, DCI); case Intrinsic::amdgcn_fract: case Intrinsic::amdgcn_rsq: case Intrinsic::amdgcn_rcp_legacy: case Intrinsic::amdgcn_rsq_legacy: case Intrinsic::amdgcn_rsq_clamp: case Intrinsic::amdgcn_ldexp: { // FIXME: This is probably wrong. If src is an sNaN, it won't be quieted SDValue Src = N->getOperand(1); return Src.isUndef() ? Src : SDValue(); } default: return SDValue(); } } /// Split the 64-bit value \p LHS into two 32-bit components, and perform the /// binary operation \p Opc to it with the corresponding constant operands. SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl( DAGCombinerInfo &DCI, const SDLoc &SL, unsigned Opc, SDValue LHS, uint32_t ValLo, uint32_t ValHi) const { SelectionDAG &DAG = DCI.DAG; SDValue Lo, Hi; std::tie(Lo, Hi) = split64BitValue(LHS, DAG); SDValue LoRHS = DAG.getConstant(ValLo, SL, MVT::i32); SDValue HiRHS = DAG.getConstant(ValHi, SL, MVT::i32); SDValue LoAnd = DAG.getNode(Opc, SL, MVT::i32, Lo, LoRHS); SDValue HiAnd = DAG.getNode(Opc, SL, MVT::i32, Hi, HiRHS); // Re-visit the ands. It's possible we eliminated one of them and it could // simplify the vector. DCI.AddToWorklist(Lo.getNode()); DCI.AddToWorklist(Hi.getNode()); SDValue Vec = DAG.getBuildVector(MVT::v2i32, SL, {LoAnd, HiAnd}); return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec); } SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const { EVT VT = N->getValueType(0); ConstantSDNode *RHS = dyn_cast(N->getOperand(1)); if (!RHS) return SDValue(); SDValue LHS = N->getOperand(0); unsigned RHSVal = RHS->getZExtValue(); if (!RHSVal) return LHS; SDLoc SL(N); SelectionDAG &DAG = DCI.DAG; switch (LHS->getOpcode()) { default: break; case ISD::ZERO_EXTEND: case ISD::SIGN_EXTEND: case ISD::ANY_EXTEND: { SDValue X = LHS->getOperand(0); if (VT == MVT::i32 && RHSVal == 16 && X.getValueType() == MVT::i16 && isOperationLegal(ISD::BUILD_VECTOR, MVT::v2i16)) { // Prefer build_vector as the canonical form if packed types are legal. // (shl ([asz]ext i16:x), 16 -> build_vector 0, x SDValue Vec = DAG.getBuildVector(MVT::v2i16, SL, { DAG.getConstant(0, SL, MVT::i16), LHS->getOperand(0) }); return DAG.getNode(ISD::BITCAST, SL, MVT::i32, Vec); } // shl (ext x) => zext (shl x), if shift does not overflow int if (VT != MVT::i64) break; KnownBits Known = DAG.computeKnownBits(X); unsigned LZ = Known.countMinLeadingZeros(); if (LZ < RHSVal) break; EVT XVT = X.getValueType(); SDValue Shl = DAG.getNode(ISD::SHL, SL, XVT, X, SDValue(RHS, 0)); return DAG.getZExtOrTrunc(Shl, SL, VT); } } if (VT != MVT::i64) return SDValue(); // i64 (shl x, C) -> (build_pair 0, (shl x, C -32)) // On some subtargets, 64-bit shift is a quarter rate instruction. In the // common case, splitting this into a move and a 32-bit shift is faster and // the same code size. if (RHSVal < 32) return SDValue(); SDValue ShiftAmt = DAG.getConstant(RHSVal - 32, SL, MVT::i32); SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS); SDValue NewShift = DAG.getNode(ISD::SHL, SL, MVT::i32, Lo, ShiftAmt); const SDValue Zero = DAG.getConstant(0, SL, MVT::i32); SDValue Vec = DAG.getBuildVector(MVT::v2i32, SL, {Zero, NewShift}); return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec); } SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N, DAGCombinerInfo &DCI) const { if (N->getValueType(0) != MVT::i64) return SDValue(); const ConstantSDNode *RHS = dyn_cast(N->getOperand(1)); if (!RHS) return SDValue(); SelectionDAG &DAG = DCI.DAG; SDLoc SL(N); unsigned RHSVal = RHS->getZExtValue(); // (sra i64:x, 32) -> build_pair x, (sra hi_32(x), 31) if (RHSVal == 32) { SDValue Hi = getHiHalf64(N->getOperand(0), DAG); SDValue NewShift = DAG.getNode(ISD::SRA, SL, MVT::i32, Hi, DAG.getConstant(31, SL, MVT::i32)); SDValue BuildVec = DAG.getBuildVector(MVT::v2i32, SL, {Hi, NewShift}); return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildVec); } // (sra i64:x, 63) -> build_pair (sra hi_32(x), 31), (sra hi_32(x), 31) if (RHSVal == 63) { SDValue Hi = getHiHalf64(N->getOperand(0), DAG); SDValue NewShift = DAG.getNode(ISD::SRA, SL, MVT::i32, Hi, DAG.getConstant(31, SL, MVT::i32)); SDValue BuildVec = DAG.getBuildVector(MVT::v2i32, SL, {NewShift, NewShift}); return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildVec); } return SDValue(); } SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N, DAGCombinerInfo &DCI) const { auto *RHS = dyn_cast(N->getOperand(1)); if (!RHS) return SDValue(); EVT VT = N->getValueType(0); SDValue LHS = N->getOperand(0); unsigned ShiftAmt = RHS->getZExtValue(); SelectionDAG &DAG = DCI.DAG; SDLoc SL(N); // fold (srl (and x, c1 << c2), c2) -> (and (srl(x, c2), c1) // this improves the ability to match BFE patterns in isel. if (LHS.getOpcode() == ISD::AND) { if (auto *Mask = dyn_cast(LHS.getOperand(1))) { unsigned MaskIdx, MaskLen; if (Mask->getAPIntValue().isShiftedMask(MaskIdx, MaskLen) && MaskIdx == ShiftAmt) { return DAG.getNode( ISD::AND, SL, VT, DAG.getNode(ISD::SRL, SL, VT, LHS.getOperand(0), N->getOperand(1)), DAG.getNode(ISD::SRL, SL, VT, LHS.getOperand(1), N->getOperand(1))); } } } if (VT != MVT::i64) return SDValue(); if (ShiftAmt < 32) return SDValue(); // srl i64:x, C for C >= 32 // => // build_pair (srl hi_32(x), C - 32), 0 SDValue Zero = DAG.getConstant(0, SL, MVT::i32); SDValue Hi = getHiHalf64(LHS, DAG); SDValue NewConst = DAG.getConstant(ShiftAmt - 32, SL, MVT::i32); SDValue NewShift = DAG.getNode(ISD::SRL, SL, MVT::i32, Hi, NewConst); SDValue BuildPair = DAG.getBuildVector(MVT::v2i32, SL, {NewShift, Zero}); return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildPair); } SDValue AMDGPUTargetLowering::performTruncateCombine( SDNode *N, DAGCombinerInfo &DCI) const { SDLoc SL(N); SelectionDAG &DAG = DCI.DAG; EVT VT = N->getValueType(0); SDValue Src = N->getOperand(0); // vt1 (truncate (bitcast (build_vector vt0:x, ...))) -> vt1 (bitcast vt0:x) if (Src.getOpcode() == ISD::BITCAST && !VT.isVector()) { SDValue Vec = Src.getOperand(0); if (Vec.getOpcode() == ISD::BUILD_VECTOR) { SDValue Elt0 = Vec.getOperand(0); EVT EltVT = Elt0.getValueType(); if (VT.getFixedSizeInBits() <= EltVT.getFixedSizeInBits()) { if (EltVT.isFloatingPoint()) { Elt0 = DAG.getNode(ISD::BITCAST, SL, EltVT.changeTypeToInteger(), Elt0); } return DAG.getNode(ISD::TRUNCATE, SL, VT, Elt0); } } } // Equivalent of above for accessing the high element of a vector as an // integer operation. // trunc (srl (bitcast (build_vector x, y))), 16 -> trunc (bitcast y) if (Src.getOpcode() == ISD::SRL && !VT.isVector()) { if (auto K = isConstOrConstSplat(Src.getOperand(1))) { if (2 * K->getZExtValue() == Src.getValueType().getScalarSizeInBits()) { SDValue BV = stripBitcast(Src.getOperand(0)); if (BV.getOpcode() == ISD::BUILD_VECTOR && BV.getValueType().getVectorNumElements() == 2) { SDValue SrcElt = BV.getOperand(1); EVT SrcEltVT = SrcElt.getValueType(); if (SrcEltVT.isFloatingPoint()) { SrcElt = DAG.getNode(ISD::BITCAST, SL, SrcEltVT.changeTypeToInteger(), SrcElt); } return DAG.getNode(ISD::TRUNCATE, SL, VT, SrcElt); } } } } // Partially shrink 64-bit shifts to 32-bit if reduced to 16-bit. // // i16 (trunc (srl i64:x, K)), K <= 16 -> // i16 (trunc (srl (i32 (trunc x), K))) if (VT.getScalarSizeInBits() < 32) { EVT SrcVT = Src.getValueType(); if (SrcVT.getScalarSizeInBits() > 32 && (Src.getOpcode() == ISD::SRL || Src.getOpcode() == ISD::SRA || Src.getOpcode() == ISD::SHL)) { SDValue Amt = Src.getOperand(1); KnownBits Known = DAG.computeKnownBits(Amt); unsigned Size = VT.getScalarSizeInBits(); if ((Known.isConstant() && Known.getConstant().ule(Size)) || (Known.countMaxActiveBits() <= Log2_32(Size))) { EVT MidVT = VT.isVector() ? EVT::getVectorVT(*DAG.getContext(), MVT::i32, VT.getVectorNumElements()) : MVT::i32; EVT NewShiftVT = getShiftAmountTy(MidVT, DAG.getDataLayout()); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MidVT, Src.getOperand(0)); DCI.AddToWorklist(Trunc.getNode()); if (Amt.getValueType() != NewShiftVT) { Amt = DAG.getZExtOrTrunc(Amt, SL, NewShiftVT); DCI.AddToWorklist(Amt.getNode()); } SDValue ShrunkShift = DAG.getNode(Src.getOpcode(), SL, MidVT, Trunc, Amt); return DAG.getNode(ISD::TRUNCATE, SL, VT, ShrunkShift); } } } return SDValue(); } // We need to specifically handle i64 mul here to avoid unnecessary conversion // instructions. If we only match on the legalized i64 mul expansion, // SimplifyDemandedBits will be unable to remove them because there will be // multiple uses due to the separate mul + mulh[su]. static SDValue getMul24(SelectionDAG &DAG, const SDLoc &SL, SDValue N0, SDValue N1, unsigned Size, bool Signed) { if (Size <= 32) { unsigned MulOpc = Signed ? AMDGPUISD::MUL_I24 : AMDGPUISD::MUL_U24; return DAG.getNode(MulOpc, SL, MVT::i32, N0, N1); } unsigned MulLoOpc = Signed ? AMDGPUISD::MUL_I24 : AMDGPUISD::MUL_U24; unsigned MulHiOpc = Signed ? AMDGPUISD::MULHI_I24 : AMDGPUISD::MULHI_U24; SDValue MulLo = DAG.getNode(MulLoOpc, SL, MVT::i32, N0, N1); SDValue MulHi = DAG.getNode(MulHiOpc, SL, MVT::i32, N0, N1); return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, MulLo, MulHi); } SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const { EVT VT = N->getValueType(0); // Don't generate 24-bit multiplies on values that are in SGPRs, since // we only have a 32-bit scalar multiply (avoid values being moved to VGPRs // unnecessarily). isDivergent() is used as an approximation of whether the // value is in an SGPR. if (!N->isDivergent()) return SDValue(); unsigned Size = VT.getSizeInBits(); if (VT.isVector() || Size > 64) return SDValue(); // There are i16 integer mul/mad. if (Subtarget->has16BitInsts() && VT.getScalarType().bitsLE(MVT::i16)) return SDValue(); SelectionDAG &DAG = DCI.DAG; SDLoc DL(N); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); // SimplifyDemandedBits has the annoying habit of turning useful zero_extends // in the source into any_extends if the result of the mul is truncated. Since // we can assume the high bits are whatever we want, use the underlying value // to avoid the unknown high bits from interfering. if (N0.getOpcode() == ISD::ANY_EXTEND) N0 = N0.getOperand(0); if (N1.getOpcode() == ISD::ANY_EXTEND) N1 = N1.getOperand(0); SDValue Mul; if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) { N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32); N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32); Mul = getMul24(DAG, DL, N0, N1, Size, false); } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) { N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32); N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32); Mul = getMul24(DAG, DL, N0, N1, Size, true); } else { return SDValue(); } // We need to use sext even for MUL_U24, because MUL_U24 is used // for signed multiply of 8 and 16-bit types. return DAG.getSExtOrTrunc(Mul, DL, VT); } SDValue AMDGPUTargetLowering::performMulLoHiCombine(SDNode *N, DAGCombinerInfo &DCI) const { if (N->getValueType(0) != MVT::i32) return SDValue(); SelectionDAG &DAG = DCI.DAG; SDLoc DL(N); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); // SimplifyDemandedBits has the annoying habit of turning useful zero_extends // in the source into any_extends if the result of the mul is truncated. Since // we can assume the high bits are whatever we want, use the underlying value // to avoid the unknown high bits from interfering. if (N0.getOpcode() == ISD::ANY_EXTEND) N0 = N0.getOperand(0); if (N1.getOpcode() == ISD::ANY_EXTEND) N1 = N1.getOperand(0); // Try to use two fast 24-bit multiplies (one for each half of the result) // instead of one slow extending multiply. unsigned LoOpcode, HiOpcode; if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) { N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32); N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32); LoOpcode = AMDGPUISD::MUL_U24; HiOpcode = AMDGPUISD::MULHI_U24; } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) { N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32); N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32); LoOpcode = AMDGPUISD::MUL_I24; HiOpcode = AMDGPUISD::MULHI_I24; } else { return SDValue(); } SDValue Lo = DAG.getNode(LoOpcode, DL, MVT::i32, N0, N1); SDValue Hi = DAG.getNode(HiOpcode, DL, MVT::i32, N0, N1); DCI.CombineTo(N, Lo, Hi); return SDValue(N, 0); } SDValue AMDGPUTargetLowering::performMulhsCombine(SDNode *N, DAGCombinerInfo &DCI) const { EVT VT = N->getValueType(0); if (!Subtarget->hasMulI24() || VT.isVector()) return SDValue(); // Don't generate 24-bit multiplies on values that are in SGPRs, since // we only have a 32-bit scalar multiply (avoid values being moved to VGPRs // unnecessarily). isDivergent() is used as an approximation of whether the // value is in an SGPR. // This doesn't apply if no s_mul_hi is available (since we'll end up with a // valu op anyway) if (Subtarget->hasSMulHi() && !N->isDivergent()) return SDValue(); SelectionDAG &DAG = DCI.DAG; SDLoc DL(N); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); if (!isI24(N0, DAG) || !isI24(N1, DAG)) return SDValue(); N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32); N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32); SDValue Mulhi = DAG.getNode(AMDGPUISD::MULHI_I24, DL, MVT::i32, N0, N1); DCI.AddToWorklist(Mulhi.getNode()); return DAG.getSExtOrTrunc(Mulhi, DL, VT); } SDValue AMDGPUTargetLowering::performMulhuCombine(SDNode *N, DAGCombinerInfo &DCI) const { EVT VT = N->getValueType(0); if (!Subtarget->hasMulU24() || VT.isVector() || VT.getSizeInBits() > 32) return SDValue(); // Don't generate 24-bit multiplies on values that are in SGPRs, since // we only have a 32-bit scalar multiply (avoid values being moved to VGPRs // unnecessarily). isDivergent() is used as an approximation of whether the // value is in an SGPR. // This doesn't apply if no s_mul_hi is available (since we'll end up with a // valu op anyway) if (Subtarget->hasSMulHi() && !N->isDivergent()) return SDValue(); SelectionDAG &DAG = DCI.DAG; SDLoc DL(N); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); if (!isU24(N0, DAG) || !isU24(N1, DAG)) return SDValue(); N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32); N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32); SDValue Mulhi = DAG.getNode(AMDGPUISD::MULHI_U24, DL, MVT::i32, N0, N1); DCI.AddToWorklist(Mulhi.getNode()); return DAG.getZExtOrTrunc(Mulhi, DL, VT); } static bool isNegativeOne(SDValue Val) { if (ConstantSDNode *C = dyn_cast(Val)) return C->isAllOnes(); return false; } SDValue AMDGPUTargetLowering::getFFBX_U32(SelectionDAG &DAG, SDValue Op, const SDLoc &DL, unsigned Opc) const { EVT VT = Op.getValueType(); EVT LegalVT = getTypeToTransformTo(*DAG.getContext(), VT); if (LegalVT != MVT::i32 && (Subtarget->has16BitInsts() && LegalVT != MVT::i16)) return SDValue(); if (VT != MVT::i32) Op = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Op); SDValue FFBX = DAG.getNode(Opc, DL, MVT::i32, Op); if (VT != MVT::i32) FFBX = DAG.getNode(ISD::TRUNCATE, DL, VT, FFBX); return FFBX; } // The native instructions return -1 on 0 input. Optimize out a select that // produces -1 on 0. // // TODO: If zero is not undef, we could also do this if the output is compared // against the bitwidth. // // TODO: Should probably combine against FFBH_U32 instead of ctlz directly. SDValue AMDGPUTargetLowering::performCtlz_CttzCombine(const SDLoc &SL, SDValue Cond, SDValue LHS, SDValue RHS, DAGCombinerInfo &DCI) const { ConstantSDNode *CmpRhs = dyn_cast(Cond.getOperand(1)); if (!CmpRhs || !CmpRhs->isZero()) return SDValue(); SelectionDAG &DAG = DCI.DAG; ISD::CondCode CCOpcode = cast(Cond.getOperand(2))->get(); SDValue CmpLHS = Cond.getOperand(0); // select (setcc x, 0, eq), -1, (ctlz_zero_undef x) -> ffbh_u32 x // select (setcc x, 0, eq), -1, (cttz_zero_undef x) -> ffbl_u32 x if (CCOpcode == ISD::SETEQ && (isCtlzOpc(RHS.getOpcode()) || isCttzOpc(RHS.getOpcode())) && RHS.getOperand(0) == CmpLHS && isNegativeOne(LHS)) { unsigned Opc = isCttzOpc(RHS.getOpcode()) ? AMDGPUISD::FFBL_B32 : AMDGPUISD::FFBH_U32; return getFFBX_U32(DAG, CmpLHS, SL, Opc); } // select (setcc x, 0, ne), (ctlz_zero_undef x), -1 -> ffbh_u32 x // select (setcc x, 0, ne), (cttz_zero_undef x), -1 -> ffbl_u32 x if (CCOpcode == ISD::SETNE && (isCtlzOpc(LHS.getOpcode()) || isCttzOpc(LHS.getOpcode())) && LHS.getOperand(0) == CmpLHS && isNegativeOne(RHS)) { unsigned Opc = isCttzOpc(LHS.getOpcode()) ? AMDGPUISD::FFBL_B32 : AMDGPUISD::FFBH_U32; return getFFBX_U32(DAG, CmpLHS, SL, Opc); } return SDValue(); } static SDValue distributeOpThroughSelect(TargetLowering::DAGCombinerInfo &DCI, unsigned Op, const SDLoc &SL, SDValue Cond, SDValue N1, SDValue N2) { SelectionDAG &DAG = DCI.DAG; EVT VT = N1.getValueType(); SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, VT, Cond, N1.getOperand(0), N2.getOperand(0)); DCI.AddToWorklist(NewSelect.getNode()); return DAG.getNode(Op, SL, VT, NewSelect); } // Pull a free FP operation out of a select so it may fold into uses. // // select c, (fneg x), (fneg y) -> fneg (select c, x, y) // select c, (fneg x), k -> fneg (select c, x, (fneg k)) // // select c, (fabs x), (fabs y) -> fabs (select c, x, y) // select c, (fabs x), +k -> fabs (select c, x, k) static SDValue foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI, SDValue N) { SelectionDAG &DAG = DCI.DAG; SDValue Cond = N.getOperand(0); SDValue LHS = N.getOperand(1); SDValue RHS = N.getOperand(2); EVT VT = N.getValueType(); if ((LHS.getOpcode() == ISD::FABS && RHS.getOpcode() == ISD::FABS) || (LHS.getOpcode() == ISD::FNEG && RHS.getOpcode() == ISD::FNEG)) { return distributeOpThroughSelect(DCI, LHS.getOpcode(), SDLoc(N), Cond, LHS, RHS); } bool Inv = false; if (RHS.getOpcode() == ISD::FABS || RHS.getOpcode() == ISD::FNEG) { std::swap(LHS, RHS); Inv = true; } // TODO: Support vector constants. ConstantFPSDNode *CRHS = dyn_cast(RHS); if ((LHS.getOpcode() == ISD::FNEG || LHS.getOpcode() == ISD::FABS) && CRHS) { SDLoc SL(N); // If one side is an fneg/fabs and the other is a constant, we can push the // fneg/fabs down. If it's an fabs, the constant needs to be non-negative. SDValue NewLHS = LHS.getOperand(0); SDValue NewRHS = RHS; // Careful: if the neg can be folded up, don't try to pull it back down. bool ShouldFoldNeg = true; if (NewLHS.hasOneUse()) { unsigned Opc = NewLHS.getOpcode(); if (LHS.getOpcode() == ISD::FNEG && fnegFoldsIntoOp(Opc)) ShouldFoldNeg = false; if (LHS.getOpcode() == ISD::FABS && Opc == ISD::FMUL) ShouldFoldNeg = false; } if (ShouldFoldNeg) { if (LHS.getOpcode() == ISD::FNEG) NewRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS); else if (CRHS->isNegative()) return SDValue(); if (Inv) std::swap(NewLHS, NewRHS); SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, VT, Cond, NewLHS, NewRHS); DCI.AddToWorklist(NewSelect.getNode()); return DAG.getNode(LHS.getOpcode(), SL, VT, NewSelect); } } return SDValue(); } SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const { if (SDValue Folded = foldFreeOpFromSelect(DCI, SDValue(N, 0))) return Folded; SDValue Cond = N->getOperand(0); if (Cond.getOpcode() != ISD::SETCC) return SDValue(); EVT VT = N->getValueType(0); SDValue LHS = Cond.getOperand(0); SDValue RHS = Cond.getOperand(1); SDValue CC = Cond.getOperand(2); SDValue True = N->getOperand(1); SDValue False = N->getOperand(2); if (Cond.hasOneUse()) { // TODO: Look for multiple select uses. SelectionDAG &DAG = DCI.DAG; if (DAG.isConstantValueOfAnyType(True) && !DAG.isConstantValueOfAnyType(False)) { // Swap cmp + select pair to move constant to false input. // This will allow using VOPC cndmasks more often. // select (setcc x, y), k, x -> select (setccinv x, y), x, k SDLoc SL(N); ISD::CondCode NewCC = getSetCCInverse(cast(CC)->get(), LHS.getValueType()); SDValue NewCond = DAG.getSetCC(SL, Cond.getValueType(), LHS, RHS, NewCC); return DAG.getNode(ISD::SELECT, SL, VT, NewCond, False, True); } if (VT == MVT::f32 && Subtarget->hasFminFmaxLegacy()) { SDValue MinMax = combineFMinMaxLegacy(SDLoc(N), VT, LHS, RHS, True, False, CC, DCI); // Revisit this node so we can catch min3/max3/med3 patterns. //DCI.AddToWorklist(MinMax.getNode()); return MinMax; } } // There's no reason to not do this if the condition has other uses. return performCtlz_CttzCombine(SDLoc(N), Cond, True, False, DCI); } static bool isInv2Pi(const APFloat &APF) { static const APFloat KF16(APFloat::IEEEhalf(), APInt(16, 0x3118)); static const APFloat KF32(APFloat::IEEEsingle(), APInt(32, 0x3e22f983)); static const APFloat KF64(APFloat::IEEEdouble(), APInt(64, 0x3fc45f306dc9c882)); return APF.bitwiseIsEqual(KF16) || APF.bitwiseIsEqual(KF32) || APF.bitwiseIsEqual(KF64); } // 0 and 1.0 / (0.5 * pi) do not have inline immmediates, so there is an // additional cost to negate them. bool AMDGPUTargetLowering::isConstantCostlierToNegate(SDValue N) const { if (const ConstantFPSDNode *C = isConstOrConstSplatFP(N)) { if (C->isZero() && !C->isNegative()) return true; if (Subtarget->hasInv2PiInlineImm() && isInv2Pi(C->getValueAPF())) return true; } return false; } static unsigned inverseMinMax(unsigned Opc) { switch (Opc) { case ISD::FMAXNUM: return ISD::FMINNUM; case ISD::FMINNUM: return ISD::FMAXNUM; case ISD::FMAXNUM_IEEE: return ISD::FMINNUM_IEEE; case ISD::FMINNUM_IEEE: return ISD::FMAXNUM_IEEE; case AMDGPUISD::FMAX_LEGACY: return AMDGPUISD::FMIN_LEGACY; case AMDGPUISD::FMIN_LEGACY: return AMDGPUISD::FMAX_LEGACY; default: llvm_unreachable("invalid min/max opcode"); } } SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); unsigned Opc = N0.getOpcode(); // If the input has multiple uses and we can either fold the negate down, or // the other uses cannot, give up. This both prevents unprofitable // transformations and infinite loops: we won't repeatedly try to fold around // a negate that has no 'good' form. if (N0.hasOneUse()) { // This may be able to fold into the source, but at a code size cost. Don't // fold if the fold into the user is free. if (allUsesHaveSourceMods(N, 0)) return SDValue(); } else { if (fnegFoldsIntoOp(Opc) && (allUsesHaveSourceMods(N) || !allUsesHaveSourceMods(N0.getNode()))) return SDValue(); } SDLoc SL(N); switch (Opc) { case ISD::FADD: { if (!mayIgnoreSignedZero(N0)) return SDValue(); // (fneg (fadd x, y)) -> (fadd (fneg x), (fneg y)) SDValue LHS = N0.getOperand(0); SDValue RHS = N0.getOperand(1); if (LHS.getOpcode() != ISD::FNEG) LHS = DAG.getNode(ISD::FNEG, SL, VT, LHS); else LHS = LHS.getOperand(0); if (RHS.getOpcode() != ISD::FNEG) RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS); else RHS = RHS.getOperand(0); SDValue Res = DAG.getNode(ISD::FADD, SL, VT, LHS, RHS, N0->getFlags()); if (Res.getOpcode() != ISD::FADD) return SDValue(); // Op got folded away. if (!N0.hasOneUse()) DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res)); return Res; } case ISD::FMUL: case AMDGPUISD::FMUL_LEGACY: { // (fneg (fmul x, y)) -> (fmul x, (fneg y)) // (fneg (fmul_legacy x, y)) -> (fmul_legacy x, (fneg y)) SDValue LHS = N0.getOperand(0); SDValue RHS = N0.getOperand(1); if (LHS.getOpcode() == ISD::FNEG) LHS = LHS.getOperand(0); else if (RHS.getOpcode() == ISD::FNEG) RHS = RHS.getOperand(0); else RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS); SDValue Res = DAG.getNode(Opc, SL, VT, LHS, RHS, N0->getFlags()); if (Res.getOpcode() != Opc) return SDValue(); // Op got folded away. if (!N0.hasOneUse()) DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res)); return Res; } case ISD::FMA: case ISD::FMAD: { // TODO: handle llvm.amdgcn.fma.legacy if (!mayIgnoreSignedZero(N0)) return SDValue(); // (fneg (fma x, y, z)) -> (fma x, (fneg y), (fneg z)) SDValue LHS = N0.getOperand(0); SDValue MHS = N0.getOperand(1); SDValue RHS = N0.getOperand(2); if (LHS.getOpcode() == ISD::FNEG) LHS = LHS.getOperand(0); else if (MHS.getOpcode() == ISD::FNEG) MHS = MHS.getOperand(0); else MHS = DAG.getNode(ISD::FNEG, SL, VT, MHS); if (RHS.getOpcode() != ISD::FNEG) RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS); else RHS = RHS.getOperand(0); SDValue Res = DAG.getNode(Opc, SL, VT, LHS, MHS, RHS); if (Res.getOpcode() != Opc) return SDValue(); // Op got folded away. if (!N0.hasOneUse()) DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res)); return Res; } case ISD::FMAXNUM: case ISD::FMINNUM: case ISD::FMAXNUM_IEEE: case ISD::FMINNUM_IEEE: case AMDGPUISD::FMAX_LEGACY: case AMDGPUISD::FMIN_LEGACY: { // fneg (fmaxnum x, y) -> fminnum (fneg x), (fneg y) // fneg (fminnum x, y) -> fmaxnum (fneg x), (fneg y) // fneg (fmax_legacy x, y) -> fmin_legacy (fneg x), (fneg y) // fneg (fmin_legacy x, y) -> fmax_legacy (fneg x), (fneg y) SDValue LHS = N0.getOperand(0); SDValue RHS = N0.getOperand(1); // 0 doesn't have a negated inline immediate. // TODO: This constant check should be generalized to other operations. if (isConstantCostlierToNegate(RHS)) return SDValue(); SDValue NegLHS = DAG.getNode(ISD::FNEG, SL, VT, LHS); SDValue NegRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS); unsigned Opposite = inverseMinMax(Opc); SDValue Res = DAG.getNode(Opposite, SL, VT, NegLHS, NegRHS, N0->getFlags()); if (Res.getOpcode() != Opposite) return SDValue(); // Op got folded away. if (!N0.hasOneUse()) DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res)); return Res; } case AMDGPUISD::FMED3: { SDValue Ops[3]; for (unsigned I = 0; I < 3; ++I) Ops[I] = DAG.getNode(ISD::FNEG, SL, VT, N0->getOperand(I), N0->getFlags()); SDValue Res = DAG.getNode(AMDGPUISD::FMED3, SL, VT, Ops, N0->getFlags()); if (Res.getOpcode() != AMDGPUISD::FMED3) return SDValue(); // Op got folded away. if (!N0.hasOneUse()) { SDValue Neg = DAG.getNode(ISD::FNEG, SL, VT, Res); DAG.ReplaceAllUsesWith(N0, Neg); for (SDNode *U : Neg->uses()) DCI.AddToWorklist(U); } return Res; } case ISD::FP_EXTEND: case ISD::FTRUNC: case ISD::FRINT: case ISD::FNEARBYINT: // XXX - Should fround be handled? case ISD::FSIN: case ISD::FCANONICALIZE: case AMDGPUISD::RCP: case AMDGPUISD::RCP_LEGACY: case AMDGPUISD::RCP_IFLAG: case AMDGPUISD::SIN_HW: { SDValue CvtSrc = N0.getOperand(0); if (CvtSrc.getOpcode() == ISD::FNEG) { // (fneg (fp_extend (fneg x))) -> (fp_extend x) // (fneg (rcp (fneg x))) -> (rcp x) return DAG.getNode(Opc, SL, VT, CvtSrc.getOperand(0)); } if (!N0.hasOneUse()) return SDValue(); // (fneg (fp_extend x)) -> (fp_extend (fneg x)) // (fneg (rcp x)) -> (rcp (fneg x)) SDValue Neg = DAG.getNode(ISD::FNEG, SL, CvtSrc.getValueType(), CvtSrc); return DAG.getNode(Opc, SL, VT, Neg, N0->getFlags()); } case ISD::FP_ROUND: { SDValue CvtSrc = N0.getOperand(0); if (CvtSrc.getOpcode() == ISD::FNEG) { // (fneg (fp_round (fneg x))) -> (fp_round x) return DAG.getNode(ISD::FP_ROUND, SL, VT, CvtSrc.getOperand(0), N0.getOperand(1)); } if (!N0.hasOneUse()) return SDValue(); // (fneg (fp_round x)) -> (fp_round (fneg x)) SDValue Neg = DAG.getNode(ISD::FNEG, SL, CvtSrc.getValueType(), CvtSrc); return DAG.getNode(ISD::FP_ROUND, SL, VT, Neg, N0.getOperand(1)); } case ISD::FP16_TO_FP: { // v_cvt_f32_f16 supports source modifiers on pre-VI targets without legal // f16, but legalization of f16 fneg ends up pulling it out of the source. // Put the fneg back as a legal source operation that can be matched later. SDLoc SL(N); SDValue Src = N0.getOperand(0); EVT SrcVT = Src.getValueType(); // fneg (fp16_to_fp x) -> fp16_to_fp (xor x, 0x8000) SDValue IntFNeg = DAG.getNode(ISD::XOR, SL, SrcVT, Src, DAG.getConstant(0x8000, SL, SrcVT)); return DAG.getNode(ISD::FP16_TO_FP, SL, N->getValueType(0), IntFNeg); } default: return SDValue(); } } SDValue AMDGPUTargetLowering::performFAbsCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; SDValue N0 = N->getOperand(0); if (!N0.hasOneUse()) return SDValue(); switch (N0.getOpcode()) { case ISD::FP16_TO_FP: { assert(!Subtarget->has16BitInsts() && "should only see if f16 is illegal"); SDLoc SL(N); SDValue Src = N0.getOperand(0); EVT SrcVT = Src.getValueType(); // fabs (fp16_to_fp x) -> fp16_to_fp (and x, 0x7fff) SDValue IntFAbs = DAG.getNode(ISD::AND, SL, SrcVT, Src, DAG.getConstant(0x7fff, SL, SrcVT)); return DAG.getNode(ISD::FP16_TO_FP, SL, N->getValueType(0), IntFAbs); } default: return SDValue(); } } SDValue AMDGPUTargetLowering::performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const { const auto *CFP = dyn_cast(N->getOperand(0)); if (!CFP) return SDValue(); // XXX - Should this flush denormals? const APFloat &Val = CFP->getValueAPF(); APFloat One(Val.getSemantics(), "1.0"); return DCI.DAG.getConstantFP(One / Val, SDLoc(N), N->getValueType(0)); } SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; SDLoc DL(N); switch(N->getOpcode()) { default: break; case ISD::BITCAST: { EVT DestVT = N->getValueType(0); // Push casts through vector builds. This helps avoid emitting a large // number of copies when materializing floating point vector constants. // // vNt1 bitcast (vNt0 (build_vector t0:x, t0:y)) => // vnt1 = build_vector (t1 (bitcast t0:x)), (t1 (bitcast t0:y)) if (DestVT.isVector()) { SDValue Src = N->getOperand(0); if (Src.getOpcode() == ISD::BUILD_VECTOR) { EVT SrcVT = Src.getValueType(); unsigned NElts = DestVT.getVectorNumElements(); if (SrcVT.getVectorNumElements() == NElts) { EVT DestEltVT = DestVT.getVectorElementType(); SmallVector CastedElts; SDLoc SL(N); for (unsigned I = 0, E = SrcVT.getVectorNumElements(); I != E; ++I) { SDValue Elt = Src.getOperand(I); CastedElts.push_back(DAG.getNode(ISD::BITCAST, DL, DestEltVT, Elt)); } return DAG.getBuildVector(DestVT, SL, CastedElts); } } } if (DestVT.getSizeInBits() != 64 || !DestVT.isVector()) break; // Fold bitcasts of constants. // // v2i32 (bitcast i64:k) -> build_vector lo_32(k), hi_32(k) // TODO: Generalize and move to DAGCombiner SDValue Src = N->getOperand(0); if (ConstantSDNode *C = dyn_cast(Src)) { SDLoc SL(N); uint64_t CVal = C->getZExtValue(); SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, DAG.getConstant(Lo_32(CVal), SL, MVT::i32), DAG.getConstant(Hi_32(CVal), SL, MVT::i32)); return DAG.getNode(ISD::BITCAST, SL, DestVT, BV); } if (ConstantFPSDNode *C = dyn_cast(Src)) { const APInt &Val = C->getValueAPF().bitcastToAPInt(); SDLoc SL(N); uint64_t CVal = Val.getZExtValue(); SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, DAG.getConstant(Lo_32(CVal), SL, MVT::i32), DAG.getConstant(Hi_32(CVal), SL, MVT::i32)); return DAG.getNode(ISD::BITCAST, SL, DestVT, Vec); } break; } case ISD::SHL: { if (DCI.getDAGCombineLevel() < AfterLegalizeDAG) break; return performShlCombine(N, DCI); } case ISD::SRL: { if (DCI.getDAGCombineLevel() < AfterLegalizeDAG) break; return performSrlCombine(N, DCI); } case ISD::SRA: { if (DCI.getDAGCombineLevel() < AfterLegalizeDAG) break; return performSraCombine(N, DCI); } case ISD::TRUNCATE: return performTruncateCombine(N, DCI); case ISD::MUL: return performMulCombine(N, DCI); case ISD::SMUL_LOHI: case ISD::UMUL_LOHI: return performMulLoHiCombine(N, DCI); case ISD::MULHS: return performMulhsCombine(N, DCI); case ISD::MULHU: return performMulhuCombine(N, DCI); case AMDGPUISD::MUL_I24: case AMDGPUISD::MUL_U24: case AMDGPUISD::MULHI_I24: case AMDGPUISD::MULHI_U24: return simplifyMul24(N, DCI); case ISD::SELECT: return performSelectCombine(N, DCI); case ISD::FNEG: return performFNegCombine(N, DCI); case ISD::FABS: return performFAbsCombine(N, DCI); case AMDGPUISD::BFE_I32: case AMDGPUISD::BFE_U32: { assert(!N->getValueType(0).isVector() && "Vector handling of BFE not implemented"); ConstantSDNode *Width = dyn_cast(N->getOperand(2)); if (!Width) break; uint32_t WidthVal = Width->getZExtValue() & 0x1f; if (WidthVal == 0) return DAG.getConstant(0, DL, MVT::i32); ConstantSDNode *Offset = dyn_cast(N->getOperand(1)); if (!Offset) break; SDValue BitsFrom = N->getOperand(0); uint32_t OffsetVal = Offset->getZExtValue() & 0x1f; bool Signed = N->getOpcode() == AMDGPUISD::BFE_I32; if (OffsetVal == 0) { // This is already sign / zero extended, so try to fold away extra BFEs. unsigned SignBits = Signed ? (32 - WidthVal + 1) : (32 - WidthVal); unsigned OpSignBits = DAG.ComputeNumSignBits(BitsFrom); if (OpSignBits >= SignBits) return BitsFrom; EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), WidthVal); if (Signed) { // This is a sign_extend_inreg. Replace it to take advantage of existing // DAG Combines. If not eliminated, we will match back to BFE during // selection. // TODO: The sext_inreg of extended types ends, although we can could // handle them in a single BFE. return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, BitsFrom, DAG.getValueType(SmallVT)); } return DAG.getZeroExtendInReg(BitsFrom, DL, SmallVT); } if (ConstantSDNode *CVal = dyn_cast(BitsFrom)) { if (Signed) { return constantFoldBFE(DAG, CVal->getSExtValue(), OffsetVal, WidthVal, DL); } return constantFoldBFE(DAG, CVal->getZExtValue(), OffsetVal, WidthVal, DL); } if ((OffsetVal + WidthVal) >= 32 && !(Subtarget->hasSDWA() && OffsetVal == 16 && WidthVal == 16)) { SDValue ShiftVal = DAG.getConstant(OffsetVal, DL, MVT::i32); return DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, MVT::i32, BitsFrom, ShiftVal); } if (BitsFrom.hasOneUse()) { APInt Demanded = APInt::getBitsSet(32, OffsetVal, OffsetVal + WidthVal); KnownBits Known; TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLI.ShrinkDemandedConstant(BitsFrom, Demanded, TLO) || TLI.SimplifyDemandedBits(BitsFrom, Demanded, Known, TLO)) { DCI.CommitTargetLoweringOpt(TLO); } } break; } case ISD::LOAD: return performLoadCombine(N, DCI); case ISD::STORE: return performStoreCombine(N, DCI); case AMDGPUISD::RCP: case AMDGPUISD::RCP_IFLAG: return performRcpCombine(N, DCI); case ISD::AssertZext: case ISD::AssertSext: return performAssertSZExtCombine(N, DCI); case ISD::INTRINSIC_WO_CHAIN: return performIntrinsicWOChainCombine(N, DCI); } return SDValue(); } //===----------------------------------------------------------------------===// // Helper functions //===----------------------------------------------------------------------===// SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, Register Reg, EVT VT, const SDLoc &SL, bool RawReg) const { MachineFunction &MF = DAG.getMachineFunction(); MachineRegisterInfo &MRI = MF.getRegInfo(); Register VReg; if (!MRI.isLiveIn(Reg)) { VReg = MRI.createVirtualRegister(RC); MRI.addLiveIn(Reg, VReg); } else { VReg = MRI.getLiveInVirtReg(Reg); } if (RawReg) return DAG.getRegister(VReg, VT); return DAG.getCopyFromReg(DAG.getEntryNode(), SL, VReg, VT); } // This may be called multiple times, and nothing prevents creating multiple // objects at the same offset. See if we already defined this object. static int getOrCreateFixedStackObject(MachineFrameInfo &MFI, unsigned Size, int64_t Offset) { for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) { if (MFI.getObjectOffset(I) == Offset) { assert(MFI.getObjectSize(I) == Size); return I; } } return MFI.CreateFixedObject(Size, Offset, true); } SDValue AMDGPUTargetLowering::loadStackInputValue(SelectionDAG &DAG, EVT VT, const SDLoc &SL, int64_t Offset) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); int FI = getOrCreateFixedStackObject(MFI, VT.getStoreSize(), Offset); auto SrcPtrInfo = MachinePointerInfo::getStack(MF, Offset); SDValue Ptr = DAG.getFrameIndex(FI, MVT::i32); return DAG.getLoad(VT, SL, DAG.getEntryNode(), Ptr, SrcPtrInfo, Align(4), MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant); } SDValue AMDGPUTargetLowering::storeStackInputValue(SelectionDAG &DAG, const SDLoc &SL, SDValue Chain, SDValue ArgVal, int64_t Offset) const { MachineFunction &MF = DAG.getMachineFunction(); MachinePointerInfo DstInfo = MachinePointerInfo::getStack(MF, Offset); const SIMachineFunctionInfo *Info = MF.getInfo(); SDValue Ptr = DAG.getConstant(Offset, SL, MVT::i32); // Stores to the argument stack area are relative to the stack pointer. SDValue SP = DAG.getCopyFromReg(Chain, SL, Info->getStackPtrOffsetReg(), MVT::i32); Ptr = DAG.getNode(ISD::ADD, SL, MVT::i32, SP, Ptr); SDValue Store = DAG.getStore(Chain, SL, ArgVal, Ptr, DstInfo, Align(4), MachineMemOperand::MODereferenceable); return Store; } SDValue AMDGPUTargetLowering::loadInputValue(SelectionDAG &DAG, const TargetRegisterClass *RC, EVT VT, const SDLoc &SL, const ArgDescriptor &Arg) const { assert(Arg && "Attempting to load missing argument"); SDValue V = Arg.isRegister() ? CreateLiveInRegister(DAG, RC, Arg.getRegister(), VT, SL) : loadStackInputValue(DAG, VT, SL, Arg.getStackOffset()); if (!Arg.isMasked()) return V; unsigned Mask = Arg.getMask(); unsigned Shift = countTrailingZeros(Mask); V = DAG.getNode(ISD::SRL, SL, VT, V, DAG.getShiftAmountConstant(Shift, VT, SL)); return DAG.getNode(ISD::AND, SL, VT, V, DAG.getConstant(Mask >> Shift, SL, VT)); } uint32_t AMDGPUTargetLowering::getImplicitParameterOffset( const MachineFunction &MF, const ImplicitParameter Param) const { const AMDGPUMachineFunction *MFI = MF.getInfo(); const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(getTargetMachine(), MF.getFunction()); unsigned ExplicitArgOffset = ST.getExplicitKernelArgOffset(MF.getFunction()); const Align Alignment = ST.getAlignmentForImplicitArgPtr(); uint64_t ArgOffset = alignTo(MFI->getExplicitKernArgSize(), Alignment) + ExplicitArgOffset; switch (Param) { case FIRST_IMPLICIT: return ArgOffset; case PRIVATE_BASE: return ArgOffset + AMDGPU::ImplicitArg::PRIVATE_BASE_OFFSET; case SHARED_BASE: return ArgOffset + AMDGPU::ImplicitArg::SHARED_BASE_OFFSET; case QUEUE_PTR: return ArgOffset + AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET; } llvm_unreachable("unexpected implicit parameter type"); } #define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node; const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((AMDGPUISD::NodeType)Opcode) { case AMDGPUISD::FIRST_NUMBER: break; // AMDIL DAG nodes NODE_NAME_CASE(UMUL); NODE_NAME_CASE(BRANCH_COND); // AMDGPU DAG nodes NODE_NAME_CASE(IF) NODE_NAME_CASE(ELSE) NODE_NAME_CASE(LOOP) NODE_NAME_CASE(CALL) NODE_NAME_CASE(TC_RETURN) NODE_NAME_CASE(TRAP) NODE_NAME_CASE(RET_FLAG) NODE_NAME_CASE(RETURN_TO_EPILOG) NODE_NAME_CASE(ENDPGM) NODE_NAME_CASE(DWORDADDR) NODE_NAME_CASE(FRACT) NODE_NAME_CASE(SETCC) NODE_NAME_CASE(SETREG) NODE_NAME_CASE(DENORM_MODE) NODE_NAME_CASE(FMA_W_CHAIN) NODE_NAME_CASE(FMUL_W_CHAIN) NODE_NAME_CASE(CLAMP) NODE_NAME_CASE(COS_HW) NODE_NAME_CASE(SIN_HW) NODE_NAME_CASE(FMAX_LEGACY) NODE_NAME_CASE(FMIN_LEGACY) NODE_NAME_CASE(FMAX3) NODE_NAME_CASE(SMAX3) NODE_NAME_CASE(UMAX3) NODE_NAME_CASE(FMIN3) NODE_NAME_CASE(SMIN3) NODE_NAME_CASE(UMIN3) NODE_NAME_CASE(FMED3) NODE_NAME_CASE(SMED3) NODE_NAME_CASE(UMED3) NODE_NAME_CASE(FDOT2) NODE_NAME_CASE(URECIP) NODE_NAME_CASE(DIV_SCALE) NODE_NAME_CASE(DIV_FMAS) NODE_NAME_CASE(DIV_FIXUP) NODE_NAME_CASE(FMAD_FTZ) NODE_NAME_CASE(RCP) NODE_NAME_CASE(RSQ) NODE_NAME_CASE(RCP_LEGACY) NODE_NAME_CASE(RCP_IFLAG) NODE_NAME_CASE(FMUL_LEGACY) NODE_NAME_CASE(RSQ_CLAMP) NODE_NAME_CASE(LDEXP) NODE_NAME_CASE(FP_CLASS) NODE_NAME_CASE(DOT4) NODE_NAME_CASE(CARRY) NODE_NAME_CASE(BORROW) NODE_NAME_CASE(BFE_U32) NODE_NAME_CASE(BFE_I32) NODE_NAME_CASE(BFI) NODE_NAME_CASE(BFM) NODE_NAME_CASE(FFBH_U32) NODE_NAME_CASE(FFBH_I32) NODE_NAME_CASE(FFBL_B32) NODE_NAME_CASE(MUL_U24) NODE_NAME_CASE(MUL_I24) NODE_NAME_CASE(MULHI_U24) NODE_NAME_CASE(MULHI_I24) NODE_NAME_CASE(MAD_U24) NODE_NAME_CASE(MAD_I24) NODE_NAME_CASE(MAD_I64_I32) NODE_NAME_CASE(MAD_U64_U32) NODE_NAME_CASE(PERM) NODE_NAME_CASE(TEXTURE_FETCH) NODE_NAME_CASE(R600_EXPORT) NODE_NAME_CASE(CONST_ADDRESS) NODE_NAME_CASE(REGISTER_LOAD) NODE_NAME_CASE(REGISTER_STORE) NODE_NAME_CASE(SAMPLE) NODE_NAME_CASE(SAMPLEB) NODE_NAME_CASE(SAMPLED) NODE_NAME_CASE(SAMPLEL) NODE_NAME_CASE(CVT_F32_UBYTE0) NODE_NAME_CASE(CVT_F32_UBYTE1) NODE_NAME_CASE(CVT_F32_UBYTE2) NODE_NAME_CASE(CVT_F32_UBYTE3) NODE_NAME_CASE(CVT_PKRTZ_F16_F32) NODE_NAME_CASE(CVT_PKNORM_I16_F32) NODE_NAME_CASE(CVT_PKNORM_U16_F32) NODE_NAME_CASE(CVT_PK_I16_I32) NODE_NAME_CASE(CVT_PK_U16_U32) NODE_NAME_CASE(FP_TO_FP16) NODE_NAME_CASE(BUILD_VERTICAL_VECTOR) NODE_NAME_CASE(CONST_DATA_PTR) NODE_NAME_CASE(PC_ADD_REL_OFFSET) NODE_NAME_CASE(LDS) NODE_NAME_CASE(FPTRUNC_ROUND_UPWARD) NODE_NAME_CASE(FPTRUNC_ROUND_DOWNWARD) NODE_NAME_CASE(DUMMY_CHAIN) case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break; NODE_NAME_CASE(LOAD_D16_HI) NODE_NAME_CASE(LOAD_D16_LO) NODE_NAME_CASE(LOAD_D16_HI_I8) NODE_NAME_CASE(LOAD_D16_HI_U8) NODE_NAME_CASE(LOAD_D16_LO_I8) NODE_NAME_CASE(LOAD_D16_LO_U8) NODE_NAME_CASE(STORE_MSKOR) NODE_NAME_CASE(LOAD_CONSTANT) NODE_NAME_CASE(TBUFFER_STORE_FORMAT) NODE_NAME_CASE(TBUFFER_STORE_FORMAT_D16) NODE_NAME_CASE(TBUFFER_LOAD_FORMAT) NODE_NAME_CASE(TBUFFER_LOAD_FORMAT_D16) NODE_NAME_CASE(DS_ORDERED_COUNT) NODE_NAME_CASE(ATOMIC_CMP_SWAP) NODE_NAME_CASE(ATOMIC_INC) NODE_NAME_CASE(ATOMIC_DEC) NODE_NAME_CASE(ATOMIC_LOAD_FMIN) NODE_NAME_CASE(ATOMIC_LOAD_FMAX) NODE_NAME_CASE(BUFFER_LOAD) NODE_NAME_CASE(BUFFER_LOAD_UBYTE) NODE_NAME_CASE(BUFFER_LOAD_USHORT) NODE_NAME_CASE(BUFFER_LOAD_BYTE) NODE_NAME_CASE(BUFFER_LOAD_SHORT) NODE_NAME_CASE(BUFFER_LOAD_FORMAT) NODE_NAME_CASE(BUFFER_LOAD_FORMAT_D16) NODE_NAME_CASE(SBUFFER_LOAD) NODE_NAME_CASE(BUFFER_STORE) NODE_NAME_CASE(BUFFER_STORE_BYTE) NODE_NAME_CASE(BUFFER_STORE_SHORT) NODE_NAME_CASE(BUFFER_STORE_FORMAT) NODE_NAME_CASE(BUFFER_STORE_FORMAT_D16) NODE_NAME_CASE(BUFFER_ATOMIC_SWAP) NODE_NAME_CASE(BUFFER_ATOMIC_ADD) NODE_NAME_CASE(BUFFER_ATOMIC_SUB) NODE_NAME_CASE(BUFFER_ATOMIC_SMIN) NODE_NAME_CASE(BUFFER_ATOMIC_UMIN) NODE_NAME_CASE(BUFFER_ATOMIC_SMAX) NODE_NAME_CASE(BUFFER_ATOMIC_UMAX) NODE_NAME_CASE(BUFFER_ATOMIC_AND) NODE_NAME_CASE(BUFFER_ATOMIC_OR) NODE_NAME_CASE(BUFFER_ATOMIC_XOR) NODE_NAME_CASE(BUFFER_ATOMIC_INC) NODE_NAME_CASE(BUFFER_ATOMIC_DEC) NODE_NAME_CASE(BUFFER_ATOMIC_CMPSWAP) NODE_NAME_CASE(BUFFER_ATOMIC_CSUB) NODE_NAME_CASE(BUFFER_ATOMIC_FADD) NODE_NAME_CASE(BUFFER_ATOMIC_FMIN) NODE_NAME_CASE(BUFFER_ATOMIC_FMAX) case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break; } return nullptr; } SDValue AMDGPUTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const { EVT VT = Operand.getValueType(); if (VT == MVT::f32) { RefinementSteps = 0; return DAG.getNode(AMDGPUISD::RSQ, SDLoc(Operand), VT, Operand); } // TODO: There is also f64 rsq instruction, but the documentation is less // clear on its precision. return SDValue(); } SDValue AMDGPUTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const { EVT VT = Operand.getValueType(); if (VT == MVT::f32) { // Reciprocal, < 1 ulp error. // // This reciprocal approximation converges to < 0.5 ulp error with one // newton rhapson performed with two fused multiple adds (FMAs). RefinementSteps = 0; return DAG.getNode(AMDGPUISD::RCP, SDLoc(Operand), VT, Operand); } // TODO: There is also f64 rcp instruction, but the documentation is less // clear on its precision. return SDValue(); } static unsigned workitemIntrinsicDim(unsigned ID) { switch (ID) { case Intrinsic::amdgcn_workitem_id_x: return 0; case Intrinsic::amdgcn_workitem_id_y: return 1; case Intrinsic::amdgcn_workitem_id_z: return 2; default: llvm_unreachable("not a workitem intrinsic"); } } void AMDGPUTargetLowering::computeKnownBitsForTargetNode( const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { Known.resetAll(); // Don't know anything. unsigned Opc = Op.getOpcode(); switch (Opc) { default: break; case AMDGPUISD::CARRY: case AMDGPUISD::BORROW: { Known.Zero = APInt::getHighBitsSet(32, 31); break; } case AMDGPUISD::BFE_I32: case AMDGPUISD::BFE_U32: { ConstantSDNode *CWidth = dyn_cast(Op.getOperand(2)); if (!CWidth) return; uint32_t Width = CWidth->getZExtValue() & 0x1f; if (Opc == AMDGPUISD::BFE_U32) Known.Zero = APInt::getHighBitsSet(32, 32 - Width); break; } case AMDGPUISD::FP_TO_FP16: { unsigned BitWidth = Known.getBitWidth(); // High bits are zero. Known.Zero = APInt::getHighBitsSet(BitWidth, BitWidth - 16); break; } case AMDGPUISD::MUL_U24: case AMDGPUISD::MUL_I24: { KnownBits LHSKnown = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); KnownBits RHSKnown = DAG.computeKnownBits(Op.getOperand(1), Depth + 1); unsigned TrailZ = LHSKnown.countMinTrailingZeros() + RHSKnown.countMinTrailingZeros(); Known.Zero.setLowBits(std::min(TrailZ, 32u)); // Skip extra check if all bits are known zeros. if (TrailZ >= 32) break; // Truncate to 24 bits. LHSKnown = LHSKnown.trunc(24); RHSKnown = RHSKnown.trunc(24); if (Opc == AMDGPUISD::MUL_I24) { unsigned LHSValBits = LHSKnown.countMaxSignificantBits(); unsigned RHSValBits = RHSKnown.countMaxSignificantBits(); unsigned MaxValBits = LHSValBits + RHSValBits; if (MaxValBits > 32) break; unsigned SignBits = 32 - MaxValBits + 1; bool LHSNegative = LHSKnown.isNegative(); bool LHSNonNegative = LHSKnown.isNonNegative(); bool LHSPositive = LHSKnown.isStrictlyPositive(); bool RHSNegative = RHSKnown.isNegative(); bool RHSNonNegative = RHSKnown.isNonNegative(); bool RHSPositive = RHSKnown.isStrictlyPositive(); if ((LHSNonNegative && RHSNonNegative) || (LHSNegative && RHSNegative)) Known.Zero.setHighBits(SignBits); else if ((LHSNegative && RHSPositive) || (LHSPositive && RHSNegative)) Known.One.setHighBits(SignBits); } else { unsigned LHSValBits = LHSKnown.countMaxActiveBits(); unsigned RHSValBits = RHSKnown.countMaxActiveBits(); unsigned MaxValBits = LHSValBits + RHSValBits; if (MaxValBits >= 32) break; Known.Zero.setBitsFrom(MaxValBits); } break; } case AMDGPUISD::PERM: { ConstantSDNode *CMask = dyn_cast(Op.getOperand(2)); if (!CMask) return; KnownBits LHSKnown = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); KnownBits RHSKnown = DAG.computeKnownBits(Op.getOperand(1), Depth + 1); unsigned Sel = CMask->getZExtValue(); for (unsigned I = 0; I < 32; I += 8) { unsigned SelBits = Sel & 0xff; if (SelBits < 4) { SelBits *= 8; Known.One |= ((RHSKnown.One.getZExtValue() >> SelBits) & 0xff) << I; Known.Zero |= ((RHSKnown.Zero.getZExtValue() >> SelBits) & 0xff) << I; } else if (SelBits < 7) { SelBits = (SelBits & 3) * 8; Known.One |= ((LHSKnown.One.getZExtValue() >> SelBits) & 0xff) << I; Known.Zero |= ((LHSKnown.Zero.getZExtValue() >> SelBits) & 0xff) << I; } else if (SelBits == 0x0c) { Known.Zero |= 0xFFull << I; } else if (SelBits > 0x0c) { Known.One |= 0xFFull << I; } Sel >>= 8; } break; } case AMDGPUISD::BUFFER_LOAD_UBYTE: { Known.Zero.setHighBits(24); break; } case AMDGPUISD::BUFFER_LOAD_USHORT: { Known.Zero.setHighBits(16); break; } case AMDGPUISD::LDS: { auto GA = cast(Op.getOperand(0).getNode()); Align Alignment = GA->getGlobal()->getPointerAlignment(DAG.getDataLayout()); Known.Zero.setHighBits(16); Known.Zero.setLowBits(Log2(Alignment)); break; } case ISD::INTRINSIC_WO_CHAIN: { unsigned IID = cast(Op.getOperand(0))->getZExtValue(); switch (IID) { case Intrinsic::amdgcn_mbcnt_lo: case Intrinsic::amdgcn_mbcnt_hi: { const GCNSubtarget &ST = DAG.getMachineFunction().getSubtarget(); - // These return at most the wavefront size - 1. + // These return at most the (wavefront size - 1) + src1 + // As long as src1 is an immediate we can calc known bits + KnownBits Src1Known = DAG.computeKnownBits(Op.getOperand(2), Depth + 1); + unsigned Src1ValBits = Src1Known.countMaxActiveBits(); + unsigned MaxActiveBits = std::max(Src1ValBits, ST.getWavefrontSizeLog2()); + // Cater for potential carry + MaxActiveBits += Src1ValBits ? 1 : 0; unsigned Size = Op.getValueType().getSizeInBits(); - Known.Zero.setHighBits(Size - ST.getWavefrontSizeLog2()); + if (MaxActiveBits < Size) + Known.Zero.setHighBits(Size - MaxActiveBits); break; } case Intrinsic::amdgcn_workitem_id_x: case Intrinsic::amdgcn_workitem_id_y: case Intrinsic::amdgcn_workitem_id_z: { unsigned MaxValue = Subtarget->getMaxWorkitemID( DAG.getMachineFunction().getFunction(), workitemIntrinsicDim(IID)); Known.Zero.setHighBits(countLeadingZeros(MaxValue)); break; } default: break; } } } } unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode( SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { switch (Op.getOpcode()) { case AMDGPUISD::BFE_I32: { ConstantSDNode *Width = dyn_cast(Op.getOperand(2)); if (!Width) return 1; unsigned SignBits = 32 - Width->getZExtValue() + 1; if (!isNullConstant(Op.getOperand(1))) return SignBits; // TODO: Could probably figure something out with non-0 offsets. unsigned Op0SignBits = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1); return std::max(SignBits, Op0SignBits); } case AMDGPUISD::BFE_U32: { ConstantSDNode *Width = dyn_cast(Op.getOperand(2)); return Width ? 32 - (Width->getZExtValue() & 0x1f) : 1; } case AMDGPUISD::CARRY: case AMDGPUISD::BORROW: return 31; case AMDGPUISD::BUFFER_LOAD_BYTE: return 25; case AMDGPUISD::BUFFER_LOAD_SHORT: return 17; case AMDGPUISD::BUFFER_LOAD_UBYTE: return 24; case AMDGPUISD::BUFFER_LOAD_USHORT: return 16; case AMDGPUISD::FP_TO_FP16: return 16; default: return 1; } } unsigned AMDGPUTargetLowering::computeNumSignBitsForTargetInstr( GISelKnownBits &Analysis, Register R, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth) const { const MachineInstr *MI = MRI.getVRegDef(R); if (!MI) return 1; // TODO: Check range metadata on MMO. switch (MI->getOpcode()) { case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE: return 25; case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT: return 17; case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE: return 24; case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT: return 16; default: return 1; } } bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op, const SelectionDAG &DAG, bool SNaN, unsigned Depth) const { unsigned Opcode = Op.getOpcode(); switch (Opcode) { case AMDGPUISD::FMIN_LEGACY: case AMDGPUISD::FMAX_LEGACY: { if (SNaN) return true; // TODO: Can check no nans on one of the operands for each one, but which // one? return false; } case AMDGPUISD::FMUL_LEGACY: case AMDGPUISD::CVT_PKRTZ_F16_F32: { if (SNaN) return true; return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) && DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1); } case AMDGPUISD::FMED3: case AMDGPUISD::FMIN3: case AMDGPUISD::FMAX3: case AMDGPUISD::FMAD_FTZ: { if (SNaN) return true; return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) && DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) && DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1); } case AMDGPUISD::CVT_F32_UBYTE0: case AMDGPUISD::CVT_F32_UBYTE1: case AMDGPUISD::CVT_F32_UBYTE2: case AMDGPUISD::CVT_F32_UBYTE3: return true; case AMDGPUISD::RCP: case AMDGPUISD::RSQ: case AMDGPUISD::RCP_LEGACY: case AMDGPUISD::RSQ_CLAMP: { if (SNaN) return true; // TODO: Need is known positive check. return false; } case AMDGPUISD::LDEXP: case AMDGPUISD::FRACT: { if (SNaN) return true; return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1); } case AMDGPUISD::DIV_SCALE: case AMDGPUISD::DIV_FMAS: case AMDGPUISD::DIV_FIXUP: // TODO: Refine on operands. return SNaN; case AMDGPUISD::SIN_HW: case AMDGPUISD::COS_HW: { // TODO: Need check for infinity return SNaN; } case ISD::INTRINSIC_WO_CHAIN: { unsigned IntrinsicID = cast(Op.getOperand(0))->getZExtValue(); // TODO: Handle more intrinsics switch (IntrinsicID) { case Intrinsic::amdgcn_cubeid: return true; case Intrinsic::amdgcn_frexp_mant: { if (SNaN) return true; return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1); } case Intrinsic::amdgcn_cvt_pkrtz: { if (SNaN) return true; return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) && DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1); } case Intrinsic::amdgcn_rcp: case Intrinsic::amdgcn_rsq: case Intrinsic::amdgcn_rcp_legacy: case Intrinsic::amdgcn_rsq_legacy: case Intrinsic::amdgcn_rsq_clamp: { if (SNaN) return true; // TODO: Need is known positive check. return false; } case Intrinsic::amdgcn_trig_preop: case Intrinsic::amdgcn_fdot2: // TODO: Refine on operand return SNaN; case Intrinsic::amdgcn_fma_legacy: if (SNaN) return true; return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) && DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1) && DAG.isKnownNeverNaN(Op.getOperand(3), SNaN, Depth + 1); default: return false; } } default: return false; } } TargetLowering::AtomicExpansionKind AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { switch (RMW->getOperation()) { case AtomicRMWInst::Nand: case AtomicRMWInst::FAdd: case AtomicRMWInst::FSub: case AtomicRMWInst::FMax: case AtomicRMWInst::FMin: return AtomicExpansionKind::CmpXChg; default: return AtomicExpansionKind::None; } } bool AMDGPUTargetLowering::isConstantUnsignedBitfieldExtractLegal( unsigned Opc, LLT Ty1, LLT Ty2) const { return (Ty1 == LLT::scalar(32) || Ty1 == LLT::scalar(64)) && Ty2 == LLT::scalar(32); } diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp index 7ec70e42f1c1..34c93be67f80 100644 --- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp +++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp @@ -1,503 +1,503 @@ //===-- llvm-symbolizer.cpp - Simple addr2line-like symbolizer ------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This utility works much like "addr2line". It is able of transforming // tuples (module name, module offset) to code locations (function name, // file, line number, column number). It is targeted for compiler-rt tools // (especially AddressSanitizer and ThreadSanitizer) that can use it // to symbolize stack traces in their error reports. // //===----------------------------------------------------------------------===// #include "Opts.inc" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Config/config.h" #include "llvm/DebugInfo/Symbolize/DIPrinter.h" #include "llvm/DebugInfo/Symbolize/Markup.h" #include "llvm/DebugInfo/Symbolize/MarkupFilter.h" #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" #include "llvm/DebugInfo/Symbolize/Symbolize.h" #include "llvm/Debuginfod/DIFetcher.h" #include "llvm/Debuginfod/Debuginfod.h" #include "llvm/Debuginfod/HTTPClient.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Option/Option.h" #include "llvm/Support/COM.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/InitLLVM.h" #include "llvm/Support/Path.h" #include "llvm/Support/StringSaver.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include using namespace llvm; using namespace symbolize; namespace { enum ID { OPT_INVALID = 0, // This is not an option ID. #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES) \ OPT_##ID, #include "Opts.inc" #undef OPTION }; #define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE; #include "Opts.inc" #undef PREFIX const opt::OptTable::Info InfoTable[] = { #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES) \ { \ PREFIX, NAME, HELPTEXT, \ METAVAR, OPT_##ID, opt::Option::KIND##Class, \ PARAM, FLAGS, OPT_##GROUP, \ OPT_##ALIAS, ALIASARGS, VALUES}, #include "Opts.inc" #undef OPTION }; class SymbolizerOptTable : public opt::OptTable { public: SymbolizerOptTable() : OptTable(InfoTable) { setGroupedShortOptions(true); } }; } // namespace template static void print(const Request &Request, Expected &ResOrErr, DIPrinter &Printer) { if (ResOrErr) { // No error, print the result. Printer.print(Request, *ResOrErr); return; } // Handle the error. bool PrintEmpty = true; handleAllErrors(std::move(ResOrErr.takeError()), [&](const ErrorInfoBase &EI) { PrintEmpty = Printer.printError( Request, EI, "LLVMSymbolizer: error reading file: "); }); if (PrintEmpty) Printer.print(Request, T()); } enum class OutputStyle { LLVM, GNU, JSON }; enum class Command { Code, Data, Frame, }; static void enableDebuginfod(LLVMSymbolizer &Symbolizer) { static bool IsEnabled = false; if (IsEnabled) return; IsEnabled = true; // Look up symbols using the debuginfod client. Symbolizer.addDIFetcher(std::make_unique()); // The HTTPClient must be initialized for use by the debuginfod client. HTTPClient::initialize(); } static SmallVector parseBuildID(StringRef Str) { std::string Bytes; if (!tryGetFromHex(Str, Bytes)) return {}; ArrayRef BuildID(reinterpret_cast(Bytes.data()), Bytes.size()); return SmallVector(BuildID.begin(), BuildID.end()); } static bool parseCommand(StringRef BinaryName, bool IsAddr2Line, StringRef InputString, Command &Cmd, std::string &ModuleName, SmallVectorImpl &BuildID, uint64_t &ModuleOffset) { const char kDelimiters[] = " \n\r"; ModuleName = ""; if (InputString.consume_front("CODE ")) { Cmd = Command::Code; } else if (InputString.consume_front("DATA ")) { Cmd = Command::Data; } else if (InputString.consume_front("FRAME ")) { Cmd = Command::Frame; } else { // If no cmd, assume it's CODE. Cmd = Command::Code; } const char *Pos; // Skip delimiters and parse input filename (if needed). if (BinaryName.empty() && BuildID.empty()) { bool HasFilePrefix = false; bool HasBuildIDPrefix = false; while (true) { if (InputString.consume_front("FILE:")) { if (HasFilePrefix) return false; HasFilePrefix = true; continue; } if (InputString.consume_front("BUILDID:")) { if (HasBuildIDPrefix) return false; HasBuildIDPrefix = true; continue; } break; } if (HasFilePrefix && HasBuildIDPrefix) return false; Pos = InputString.data(); Pos += strspn(Pos, kDelimiters); if (*Pos == '"' || *Pos == '\'') { char Quote = *Pos; Pos++; const char *End = strchr(Pos, Quote); if (!End) return false; ModuleName = std::string(Pos, End - Pos); Pos = End + 1; } else { int NameLength = strcspn(Pos, kDelimiters); ModuleName = std::string(Pos, NameLength); Pos += NameLength; } if (HasBuildIDPrefix) { BuildID = parseBuildID(ModuleName); if (BuildID.empty()) return false; ModuleName.clear(); } } else { Pos = InputString.data(); ModuleName = BinaryName.str(); } // Skip delimiters and parse module offset. Pos += strspn(Pos, kDelimiters); int OffsetLength = strcspn(Pos, kDelimiters); StringRef Offset(Pos, OffsetLength); // GNU addr2line assumes the offset is hexadecimal and allows a redundant // "0x" or "0X" prefix; do the same for compatibility. if (IsAddr2Line) Offset.consume_front("0x") || Offset.consume_front("0X"); return !Offset.getAsInteger(IsAddr2Line ? 16 : 0, ModuleOffset); } template void executeCommand(StringRef ModuleName, const T &ModuleSpec, Command Cmd, uint64_t Offset, uint64_t AdjustVMA, bool ShouldInline, OutputStyle Style, LLVMSymbolizer &Symbolizer, DIPrinter &Printer) { uint64_t AdjustedOffset = Offset - AdjustVMA; object::SectionedAddress Address = {AdjustedOffset, object::SectionedAddress::UndefSection}; if (Cmd == Command::Data) { Expected ResOrErr = Symbolizer.symbolizeData(ModuleSpec, Address); print({ModuleName, Offset}, ResOrErr, Printer); } else if (Cmd == Command::Frame) { Expected> ResOrErr = Symbolizer.symbolizeFrame(ModuleSpec, Address); print({ModuleName, Offset}, ResOrErr, Printer); } else if (ShouldInline) { Expected ResOrErr = Symbolizer.symbolizeInlinedCode(ModuleSpec, Address); print({ModuleName, Offset}, ResOrErr, Printer); } else if (Style == OutputStyle::GNU) { // With PrintFunctions == FunctionNameKind::LinkageName (default) // and UseSymbolTable == true (also default), Symbolizer.symbolizeCode() // may override the name of an inlined function with the name of the topmost // caller function in the inlining chain. This contradicts the existing // behavior of addr2line. Symbolizer.symbolizeInlinedCode() overrides only // the topmost function, which suits our needs better. Expected ResOrErr = Symbolizer.symbolizeInlinedCode(ModuleSpec, Address); Expected Res0OrErr = !ResOrErr ? Expected(ResOrErr.takeError()) : ((ResOrErr->getNumberOfFrames() == 0) ? DILineInfo() : ResOrErr->getFrame(0)); print({ModuleName, Offset}, Res0OrErr, Printer); } else { Expected ResOrErr = Symbolizer.symbolizeCode(ModuleSpec, Address); print({ModuleName, Offset}, ResOrErr, Printer); } Symbolizer.pruneCache(); } static void symbolizeInput(const opt::InputArgList &Args, ArrayRef IncomingBuildID, uint64_t AdjustVMA, bool IsAddr2Line, OutputStyle Style, StringRef InputString, LLVMSymbolizer &Symbolizer, DIPrinter &Printer) { Command Cmd; std::string ModuleName; SmallVector BuildID(IncomingBuildID.begin(), IncomingBuildID.end()); uint64_t Offset = 0; if (!parseCommand(Args.getLastArgValue(OPT_obj_EQ), IsAddr2Line, StringRef(InputString), Cmd, ModuleName, BuildID, Offset)) { Printer.printInvalidCommand({ModuleName, None}, InputString); return; } bool ShouldInline = Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line); if (!BuildID.empty()) { assert(ModuleName.empty()); if (!Args.hasArg(OPT_no_debuginfod)) enableDebuginfod(Symbolizer); std::string BuildIDStr = toHex(BuildID); executeCommand(BuildIDStr, BuildID, Cmd, Offset, AdjustVMA, ShouldInline, Style, Symbolizer, Printer); } else { executeCommand(ModuleName, ModuleName, Cmd, Offset, AdjustVMA, ShouldInline, Style, Symbolizer, Printer); } } static void printHelp(StringRef ToolName, const SymbolizerOptTable &Tbl, raw_ostream &OS) { const char HelpText[] = " [options] addresses..."; Tbl.printHelp(OS, (ToolName + HelpText).str().c_str(), ToolName.str().c_str()); // TODO Replace this with OptTable API once it adds extrahelp support. OS << "\nPass @FILE as argument to read options from FILE.\n"; } static opt::InputArgList parseOptions(int Argc, char *Argv[], bool IsAddr2Line, StringSaver &Saver, SymbolizerOptTable &Tbl) { StringRef ToolName = IsAddr2Line ? "llvm-addr2line" : "llvm-symbolizer"; // The environment variable specifies initial options which can be overridden // by commnad line options. Tbl.setInitialOptionsFromEnvironment(IsAddr2Line ? "LLVM_ADDR2LINE_OPTS" : "LLVM_SYMBOLIZER_OPTS"); bool HasError = false; opt::InputArgList Args = Tbl.parseArgs(Argc, Argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) { errs() << ("error: " + Msg + "\n"); HasError = true; }); if (HasError) exit(1); if (Args.hasArg(OPT_help)) { printHelp(ToolName, Tbl, outs()); exit(0); } if (Args.hasArg(OPT_version)) { outs() << ToolName << '\n'; cl::PrintVersionMessage(); exit(0); } return Args; } template static void parseIntArg(const opt::InputArgList &Args, int ID, T &Value) { if (const opt::Arg *A = Args.getLastArg(ID)) { StringRef V(A->getValue()); if (!llvm::to_integer(V, Value, 0)) { errs() << A->getSpelling() + ": expected a non-negative integer, but got '" + V + "'"; exit(1); } } else { Value = 0; } } static FunctionNameKind decideHowToPrintFunctions(const opt::InputArgList &Args, bool IsAddr2Line) { if (Args.hasArg(OPT_functions)) return FunctionNameKind::LinkageName; if (const opt::Arg *A = Args.getLastArg(OPT_functions_EQ)) return StringSwitch(A->getValue()) .Case("none", FunctionNameKind::None) .Case("short", FunctionNameKind::ShortName) .Default(FunctionNameKind::LinkageName); return IsAddr2Line ? FunctionNameKind::None : FunctionNameKind::LinkageName; } static Optional parseColorArg(const opt::InputArgList &Args) { if (Args.hasArg(OPT_color)) return true; if (const opt::Arg *A = Args.getLastArg(OPT_color_EQ)) return StringSwitch>(A->getValue()) .Case("always", true) .Case("never", false) .Case("auto", None); return None; } static SmallVector parseBuildIDArg(const opt::InputArgList &Args, int ID) { const opt::Arg *A = Args.getLastArg(ID); if (!A) return {}; StringRef V(A->getValue()); SmallVector BuildID = parseBuildID(V); if (BuildID.empty()) { errs() << A->getSpelling() + ": expected a build ID, but got '" + V + "'\n"; exit(1); } return BuildID; } // Symbolize markup from stdin and write the result to stdout. -static void filterMarkup(const opt::InputArgList &Args) { - MarkupFilter Filter(outs(), parseColorArg(Args)); +static void filterMarkup(const opt::InputArgList &Args, LLVMSymbolizer &Symbolizer) { + MarkupFilter Filter(outs(), Symbolizer, parseColorArg(Args)); std::string InputString; while (std::getline(std::cin, InputString)) { InputString += '\n'; Filter.filter(InputString); } Filter.finish(); } ExitOnError ExitOnErr; int main(int argc, char **argv) { InitLLVM X(argc, argv); sys::InitializeCOMRAII COM(sys::COMThreadingMode::MultiThreaded); bool IsAddr2Line = sys::path::stem(argv[0]).contains("addr2line"); BumpPtrAllocator A; StringSaver Saver(A); SymbolizerOptTable Tbl; opt::InputArgList Args = parseOptions(argc, argv, IsAddr2Line, Saver, Tbl); LLVMSymbolizer::Options Opts; uint64_t AdjustVMA; PrinterConfig Config; parseIntArg(Args, OPT_adjust_vma_EQ, AdjustVMA); if (const opt::Arg *A = Args.getLastArg(OPT_basenames, OPT_relativenames)) { Opts.PathStyle = A->getOption().matches(OPT_basenames) ? DILineInfoSpecifier::FileLineInfoKind::BaseNameOnly : DILineInfoSpecifier::FileLineInfoKind::RelativeFilePath; } else { Opts.PathStyle = DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath; } Opts.DebugFileDirectory = Args.getAllArgValues(OPT_debug_file_directory_EQ); Opts.DefaultArch = Args.getLastArgValue(OPT_default_arch_EQ).str(); Opts.Demangle = Args.hasFlag(OPT_demangle, OPT_no_demangle, !IsAddr2Line); Opts.DWPName = Args.getLastArgValue(OPT_dwp_EQ).str(); Opts.FallbackDebugPath = Args.getLastArgValue(OPT_fallback_debug_path_EQ).str(); Opts.PrintFunctions = decideHowToPrintFunctions(Args, IsAddr2Line); parseIntArg(Args, OPT_print_source_context_lines_EQ, Config.SourceContextLines); Opts.RelativeAddresses = Args.hasArg(OPT_relative_address); Opts.UntagAddresses = Args.hasFlag(OPT_untag_addresses, OPT_no_untag_addresses, !IsAddr2Line); Opts.UseDIA = Args.hasArg(OPT_use_dia); #if !defined(LLVM_ENABLE_DIA_SDK) if (Opts.UseDIA) { WithColor::warning() << "DIA not available; using native PDB reader\n"; Opts.UseDIA = false; } #endif Opts.UseSymbolTable = true; if (Args.hasArg(OPT_cache_size_EQ)) parseIntArg(Args, OPT_cache_size_EQ, Opts.MaxCacheSize); Config.PrintAddress = Args.hasArg(OPT_addresses); Config.PrintFunctions = Opts.PrintFunctions != FunctionNameKind::None; Config.Pretty = Args.hasArg(OPT_pretty_print); Config.Verbose = Args.hasArg(OPT_verbose); for (const opt::Arg *A : Args.filtered(OPT_dsym_hint_EQ)) { StringRef Hint(A->getValue()); if (sys::path::extension(Hint) == ".dSYM") { Opts.DsymHints.emplace_back(Hint); } else { errs() << "Warning: invalid dSYM hint: \"" << Hint << "\" (must have the '.dSYM' extension).\n"; } } + LLVMSymbolizer Symbolizer(Opts); + + // A debuginfod lookup could succeed if a HTTP client is available and at + // least one backing URL is configured. + bool ShouldUseDebuginfodByDefault = + HTTPClient::isAvailable() && + !ExitOnErr(getDefaultDebuginfodUrls()).empty(); + if (Args.hasFlag(OPT_debuginfod, OPT_no_debuginfod, + ShouldUseDebuginfodByDefault)) + enableDebuginfod(Symbolizer); + if (Args.hasArg(OPT_filter_markup)) { - filterMarkup(Args); + filterMarkup(Args, Symbolizer); return 0; } auto Style = IsAddr2Line ? OutputStyle::GNU : OutputStyle::LLVM; if (const opt::Arg *A = Args.getLastArg(OPT_output_style_EQ)) { if (strcmp(A->getValue(), "GNU") == 0) Style = OutputStyle::GNU; else if (strcmp(A->getValue(), "JSON") == 0) Style = OutputStyle::JSON; else Style = OutputStyle::LLVM; } if (Args.hasArg(OPT_build_id_EQ) && Args.hasArg(OPT_obj_EQ)) { errs() << "error: cannot specify both --build-id and --obj\n"; return EXIT_FAILURE; } SmallVector BuildID = parseBuildIDArg(Args, OPT_build_id_EQ); - LLVMSymbolizer Symbolizer(Opts); - - // A debuginfod lookup could succeed if a HTTP client is available and at - // least one backing URL is configured. - bool ShouldUseDebuginfodByDefault = - HTTPClient::isAvailable() && - !ExitOnErr(getDefaultDebuginfodUrls()).empty(); - if (Args.hasFlag(OPT_debuginfod, OPT_no_debuginfod, - ShouldUseDebuginfodByDefault)) - enableDebuginfod(Symbolizer); - std::unique_ptr Printer; if (Style == OutputStyle::GNU) Printer = std::make_unique(outs(), errs(), Config); else if (Style == OutputStyle::JSON) Printer = std::make_unique(outs(), Config); else Printer = std::make_unique(outs(), errs(), Config); std::vector InputAddresses = Args.getAllArgValues(OPT_INPUT); if (InputAddresses.empty()) { const int kMaxInputStringLength = 1024; char InputString[kMaxInputStringLength]; while (fgets(InputString, sizeof(InputString), stdin)) { // Strip newline characters. std::string StrippedInputString(InputString); llvm::erase_if(StrippedInputString, [](char c) { return c == '\r' || c == '\n'; }); symbolizeInput(Args, BuildID, AdjustVMA, IsAddr2Line, Style, StrippedInputString, Symbolizer, *Printer); outs().flush(); } } else { Printer->listBegin(); for (StringRef Address : InputAddresses) symbolizeInput(Args, BuildID, AdjustVMA, IsAddr2Line, Style, Address, Symbolizer, *Printer); Printer->listEnd(); } return 0; }