No OneTemporary
Actions

Size

634 KB

Referenced Files

None

Subscribers

None

View Options

This file is larger than 256 KB, so syntax highlighting was skipped.

	Index: head/contrib/compiler-rt
	===================================================================
	--- head/contrib/compiler-rt (revision 349792)
	+++ head/contrib/compiler-rt (revision 349793)

	Property changes on: head/contrib/compiler-rt
	___________________________________________________________________
	Modified: svn:mergeinfo
	## -0,0 +0,1 ##
	Merged /vendor/compiler-rt/dist-release_80:r348963-349790
	Index: head/contrib/libc++
	===================================================================
	--- head/contrib/libc++ (revision 349792)
	+++ head/contrib/libc++ (revision 349793)

	Property changes on: head/contrib/libc++
	___________________________________________________________________
	Modified: svn:mergeinfo
	## -0,0 +0,1 ##
	Merged /vendor/libc++/dist-release_80:r348963-349790
	Index: head/contrib/libunwind/src/DwarfInstructions.hpp
	===================================================================
	--- head/contrib/libunwind/src/DwarfInstructions.hpp (revision 349792)
	+++ head/contrib/libunwind/src/DwarfInstructions.hpp (revision 349793)
	@@ -1,795 +1,820 @@
	//===-------------------------- DwarfInstructions.hpp ---------------------===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is dual licensed under the MIT and the University of Illinois Open
	// Source Licenses. See LICENSE.TXT for details.
	//
	//
	// Processor specific interpretation of DWARF unwind info.
	//
	//===----------------------------------------------------------------------===//

	#ifndef __DWARF_INSTRUCTIONS_HPP__
	#define __DWARF_INSTRUCTIONS_HPP__

	#include <stdint.h>
	#include <stdio.h>
	#include <stdlib.h>

	#include "dwarf2.h"
	#include "Registers.hpp"
	#include "DwarfParser.hpp"
	#include "config.h"


	namespace libunwind {


	/// DwarfInstructions maps abtract DWARF unwind instructions to a particular
	/// architecture
	template <typename A, typename R>
	class DwarfInstructions {
	public:
	typedef typename A::pint_t pint_t;
	typedef typename A::sint_t sint_t;

	static int stepWithDwarf(A &addressSpace, pint_t pc, pint_t fdeStart,
	R &registers);

	private:

	enum {
	DW_X86_64_RET_ADDR = 16
	};

	enum {
	DW_X86_RET_ADDR = 8
	};

	typedef typename CFI_Parser<A>::RegisterLocation RegisterLocation;
	typedef typename CFI_Parser<A>::PrologInfo PrologInfo;
	typedef typename CFI_Parser<A>::FDE_Info FDE_Info;
	typedef typename CFI_Parser<A>::CIE_Info CIE_Info;

	static pint_t evaluateExpression(pint_t expression, A &addressSpace,
	const R &registers,
	pint_t initialStackValue);
	static pint_t getSavedRegister(A &addressSpace, const R &registers,
	pint_t cfa, const RegisterLocation &savedReg);
	static double getSavedFloatRegister(A &addressSpace, const R &registers,
	pint_t cfa, const RegisterLocation &savedReg);
	static v128 getSavedVectorRegister(A &addressSpace, const R &registers,
	pint_t cfa, const RegisterLocation &savedReg);

	static pint_t getCFA(A &addressSpace, const PrologInfo &prolog,
	const R &registers) {
	if (prolog.cfaRegister != 0)
	return (pint_t)((sint_t)registers.getRegister((int)prolog.cfaRegister) +
	prolog.cfaRegisterOffset);
	if (prolog.cfaExpression != 0)
	return evaluateExpression((pint_t)prolog.cfaExpression, addressSpace,
	registers, 0);
	assert(0 && "getCFA(): unknown location");
	__builtin_unreachable();
	}
	};


	template <typename A, typename R>
	typename A::pint_t DwarfInstructions<A, R>::getSavedRegister(
	A &addressSpace, const R &registers, pint_t cfa,
	const RegisterLocation &savedReg) {
	switch (savedReg.location) {
	case CFI_Parser<A>::kRegisterInCFA:
	return addressSpace.getRegister(cfa + (pint_t)savedReg.value);

	case CFI_Parser<A>::kRegisterAtExpression:
	return addressSpace.getRegister(
	evaluateExpression((pint_t)savedReg.value, addressSpace,
	registers, cfa));

	case CFI_Parser<A>::kRegisterIsExpression:
	return evaluateExpression((pint_t)savedReg.value, addressSpace,
	registers, cfa);

	case CFI_Parser<A>::kRegisterInRegister:
	return registers.getRegister((int)savedReg.value);

	case CFI_Parser<A>::kRegisterUnused:
	case CFI_Parser<A>::kRegisterOffsetFromCFA:
	// FIX ME
	break;
	}
	_LIBUNWIND_ABORT("unsupported restore location for register");
	}

	template <typename A, typename R>
	double DwarfInstructions<A, R>::getSavedFloatRegister(
	A &addressSpace, const R &registers, pint_t cfa,
	const RegisterLocation &savedReg) {
	switch (savedReg.location) {
	case CFI_Parser<A>::kRegisterInCFA:
	return addressSpace.getDouble(cfa + (pint_t)savedReg.value);

	case CFI_Parser<A>::kRegisterAtExpression:
	return addressSpace.getDouble(
	evaluateExpression((pint_t)savedReg.value, addressSpace,
	registers, cfa));

	case CFI_Parser<A>::kRegisterIsExpression:
	case CFI_Parser<A>::kRegisterUnused:
	case CFI_Parser<A>::kRegisterOffsetFromCFA:
	case CFI_Parser<A>::kRegisterInRegister:
	// FIX ME
	break;
	}
	_LIBUNWIND_ABORT("unsupported restore location for float register");
	}

	template <typename A, typename R>
	v128 DwarfInstructions<A, R>::getSavedVectorRegister(
	A &addressSpace, const R &registers, pint_t cfa,
	const RegisterLocation &savedReg) {
	switch (savedReg.location) {
	case CFI_Parser<A>::kRegisterInCFA:
	return addressSpace.getVector(cfa + (pint_t)savedReg.value);

	case CFI_Parser<A>::kRegisterAtExpression:
	return addressSpace.getVector(
	evaluateExpression((pint_t)savedReg.value, addressSpace,
	registers, cfa));

	case CFI_Parser<A>::kRegisterIsExpression:
	case CFI_Parser<A>::kRegisterUnused:
	case CFI_Parser<A>::kRegisterOffsetFromCFA:
	case CFI_Parser<A>::kRegisterInRegister:
	// FIX ME
	break;
	}
	_LIBUNWIND_ABORT("unsupported restore location for vector register");
	}

	template <typename A, typename R>
	int DwarfInstructions<A, R>::stepWithDwarf(A &addressSpace, pint_t pc,
	pint_t fdeStart, R &registers) {
	FDE_Info fdeInfo;
	CIE_Info cieInfo;
	if (CFI_Parser<A>::decodeFDE(addressSpace, fdeStart, &fdeInfo,
	&cieInfo) == NULL) {
	PrologInfo prolog;
	if (CFI_Parser<A>::parseFDEInstructions(addressSpace, fdeInfo, cieInfo, pc,
	R::getArch(), &prolog)) {
	// get pointer to cfa (architecture specific)
	pint_t cfa = getCFA(addressSpace, prolog, registers);

	// restore registers that DWARF says were saved
	R newRegisters = registers;
	pint_t returnAddress = 0;
	const int lastReg = R::lastDwarfRegNum();
	assert(static_cast<int>(CFI_Parser<A>::kMaxRegisterNumber) >= lastReg &&
	"register range too large");
	assert(lastReg >= (int)cieInfo.returnAddressRegister &&
	"register range does not contain return address register");
	for (int i = 0; i <= lastReg; ++i) {
	if (prolog.savedRegisters[i].location !=
	CFI_Parser<A>::kRegisterUnused) {
	if (registers.validFloatRegister(i))
	newRegisters.setFloatRegister(
	i, getSavedFloatRegister(addressSpace, registers, cfa,
	prolog.savedRegisters[i]));
	else if (registers.validVectorRegister(i))
	newRegisters.setVectorRegister(
	i, getSavedVectorRegister(addressSpace, registers, cfa,
	prolog.savedRegisters[i]));
	else if (i == (int)cieInfo.returnAddressRegister)
	returnAddress = getSavedRegister(addressSpace, registers, cfa,
	prolog.savedRegisters[i]);
	else if (registers.validRegister(i))
	newRegisters.setRegister(
	i, getSavedRegister(addressSpace, registers, cfa,
	prolog.savedRegisters[i]));
	else
	return UNW_EBADREG;
	}
	}

	// By definition, the CFA is the stack pointer at the call site, so
	// restoring SP means setting it to CFA.
	newRegisters.setSP(cfa);

	#if defined(_LIBUNWIND_TARGET_AARCH64)
	// If the target is aarch64 then the return address may have been signed
	// using the v8.3 pointer authentication extensions. The original
	// return address needs to be authenticated before the return address is
	// restored. autia1716 is used instead of autia as autia1716 assembles
	// to a NOP on pre-v8.3a architectures.
	if ((R::getArch() == REGISTERS_ARM64) &&
	prolog.savedRegisters[UNW_ARM64_RA_SIGN_STATE].value) {
	#if !defined(_LIBUNWIND_IS_NATIVE_ONLY)
	return UNW_ECROSSRASIGNING;
	#else
	register unsigned long long x17 __asm("x17") = returnAddress;
	register unsigned long long x16 __asm("x16") = cfa;

	// These are the autia1716/autib1716 instructions. The hint instructions
	// are used here as gcc does not assemble autia1716/autib1716 for pre
	// armv8.3a targets.
	if (cieInfo.addressesSignedWithBKey)
	asm("hint 0xe" : "+r"(x17) : "r"(x16)); // autib1716
	else
	asm("hint 0xc" : "+r"(x17) : "r"(x16)); // autia1716
	returnAddress = x17;
	#endif
	}
	#endif

	#if defined(_LIBUNWIND_TARGET_SPARC)
	if (R::getArch() == REGISTERS_SPARC) {
	// Skip call site instruction and delay slot
	returnAddress += 8;
	// Skip unimp instruction if function returns a struct
	if ((addressSpace.get32(returnAddress) & 0xC1C00000) == 0)
	returnAddress += 4;
	}
	#endif

	+#if defined(_LIBUNWIND_TARGET_PPC64)
	+#define PPC64_ELFV1_R2_LOAD_INST_ENCODING 0xe8410028u // ld r2,40(r1)
	+#define PPC64_ELFV1_R2_OFFSET 40
	+#define PPC64_ELFV2_R2_LOAD_INST_ENCODING 0xe8410018u // ld r2,24(r1)
	+#define PPC64_ELFV2_R2_OFFSET 24
	+ // If the instruction at return address is a TOC (r2) restore,
	+ // then r2 was saved and needs to be restored.
	+ // ELFv2 ABI specifies that the TOC Pointer must be saved at SP + 24,
	+ // while in ELFv1 ABI it is saved at SP + 40.
	+ if (R::getArch() == REGISTERS_PPC64 && returnAddress != 0) {
	+ pint_t sp = newRegisters.getRegister(UNW_REG_SP);
	+ pint_t r2 = 0;
	+ switch (addressSpace.get32(returnAddress)) {
	+ case PPC64_ELFV1_R2_LOAD_INST_ENCODING:
	+ r2 = addressSpace.get64(sp + PPC64_ELFV1_R2_OFFSET);
	+ break;
	+ case PPC64_ELFV2_R2_LOAD_INST_ENCODING:
	+ r2 = addressSpace.get64(sp + PPC64_ELFV2_R2_OFFSET);
	+ break;
	+ }
	+ if (r2)
	+ newRegisters.setRegister(UNW_PPC64_R2, r2);
	+ }
	+#endif
	+
	// Return address is address after call site instruction, so setting IP to
	// that does simualates a return.
	newRegisters.setIP(returnAddress);

	// Simulate the step by replacing the register set with the new ones.
	registers = newRegisters;

	return UNW_STEP_SUCCESS;
	}
	}
	return UNW_EBADFRAME;
	}

	template <typename A, typename R>
	typename A::pint_t
	DwarfInstructions<A, R>::evaluateExpression(pint_t expression, A &addressSpace,
	const R &registers,
	pint_t initialStackValue) {
	const bool log = false;
	pint_t p = expression;
	pint_t expressionEnd = expression + 20; // temp, until len read
	pint_t length = (pint_t)addressSpace.getULEB128(p, expressionEnd);
	expressionEnd = p + length;
	if (log)
	fprintf(stderr, "evaluateExpression(): length=%" PRIu64 "\n",
	(uint64_t)length);
	pint_t stack[100];
	pint_t *sp = stack;
	*(++sp) = initialStackValue;

	while (p < expressionEnd) {
	if (log) {
	for (pint_t *t = sp; t > stack; --t) {
	fprintf(stderr, "sp[] = 0x%" PRIx64 "\n", (uint64_t)(*t));
	}
	}
	uint8_t opcode = addressSpace.get8(p++);
	sint_t svalue, svalue2;
	pint_t value;
	uint32_t reg;
	switch (opcode) {
	case DW_OP_addr:
	// push immediate address sized value
	value = addressSpace.getP(p);
	p += sizeof(pint_t);
	*(++sp) = value;
	if (log)
	fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value);
	break;

	case DW_OP_deref:
	// pop stack, dereference, push result
	value = *sp--;
	*(++sp) = addressSpace.getP(value);
	if (log)
	fprintf(stderr, "dereference 0x%" PRIx64 "\n", (uint64_t)value);
	break;

	case DW_OP_const1u:
	// push immediate 1 byte value
	value = addressSpace.get8(p);
	p += 1;
	*(++sp) = value;
	if (log)
	fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value);
	break;

	case DW_OP_const1s:
	// push immediate 1 byte signed value
	svalue = (int8_t) addressSpace.get8(p);
	p += 1;
	*(++sp) = (pint_t)svalue;
	if (log)
	fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)svalue);
	break;

	case DW_OP_const2u:
	// push immediate 2 byte value
	value = addressSpace.get16(p);
	p += 2;
	*(++sp) = value;
	if (log)
	fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value);
	break;

	case DW_OP_const2s:
	// push immediate 2 byte signed value
	svalue = (int16_t) addressSpace.get16(p);
	p += 2;
	*(++sp) = (pint_t)svalue;
	if (log)
	fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)svalue);
	break;

	case DW_OP_const4u:
	// push immediate 4 byte value
	value = addressSpace.get32(p);
	p += 4;
	*(++sp) = value;
	if (log)
	fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value);
	break;

	case DW_OP_const4s:
	// push immediate 4 byte signed value
	svalue = (int32_t)addressSpace.get32(p);
	p += 4;
	*(++sp) = (pint_t)svalue;
	if (log)
	fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)svalue);
	break;

	case DW_OP_const8u:
	// push immediate 8 byte value
	value = (pint_t)addressSpace.get64(p);
	p += 8;
	*(++sp) = value;
	if (log)
	fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value);
	break;

	case DW_OP_const8s:
	// push immediate 8 byte signed value
	value = (pint_t)addressSpace.get64(p);
	p += 8;
	*(++sp) = value;
	if (log)
	fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value);
	break;

	case DW_OP_constu:
	// push immediate ULEB128 value
	value = (pint_t)addressSpace.getULEB128(p, expressionEnd);
	*(++sp) = value;
	if (log)
	fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value);
	break;

	case DW_OP_consts:
	// push immediate SLEB128 value
	svalue = (sint_t)addressSpace.getSLEB128(p, expressionEnd);
	*(++sp) = (pint_t)svalue;
	if (log)
	fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)svalue);
	break;

	case DW_OP_dup:
	// push top of stack
	value = *sp;
	*(++sp) = value;
	if (log)
	fprintf(stderr, "duplicate top of stack\n");
	break;

	case DW_OP_drop:
	// pop
	--sp;
	if (log)
	fprintf(stderr, "pop top of stack\n");
	break;

	case DW_OP_over:
	// dup second
	value = sp[-1];
	*(++sp) = value;
	if (log)
	fprintf(stderr, "duplicate second in stack\n");
	break;

	case DW_OP_pick:
	// pick from
	reg = addressSpace.get8(p);
	p += 1;
	value = sp[-reg];
	*(++sp) = value;
	if (log)
	fprintf(stderr, "duplicate %d in stack\n", reg);
	break;

	case DW_OP_swap:
	// swap top two
	value = sp[0];
	sp[0] = sp[-1];
	sp[-1] = value;
	if (log)
	fprintf(stderr, "swap top of stack\n");
	break;

	case DW_OP_rot:
	// rotate top three
	value = sp[0];
	sp[0] = sp[-1];
	sp[-1] = sp[-2];
	sp[-2] = value;
	if (log)
	fprintf(stderr, "rotate top three of stack\n");
	break;

	case DW_OP_xderef:
	// pop stack, dereference, push result
	value = *sp--;
	sp = ((pint_t*)value);
	if (log)
	fprintf(stderr, "x-dereference 0x%" PRIx64 "\n", (uint64_t)value);
	break;

	case DW_OP_abs:
	svalue = (sint_t)*sp;
	if (svalue < 0)
	*sp = (pint_t)(-svalue);
	if (log)
	fprintf(stderr, "abs\n");
	break;

	case DW_OP_and:
	value = *sp--;
	*sp &= value;
	if (log)
	fprintf(stderr, "and\n");
	break;

	case DW_OP_div:
	svalue = (sint_t)(*sp--);
	svalue2 = (sint_t)*sp;
	*sp = (pint_t)(svalue2 / svalue);
	if (log)
	fprintf(stderr, "div\n");
	break;

	case DW_OP_minus:
	value = *sp--;
	sp = sp - value;
	if (log)
	fprintf(stderr, "minus\n");
	break;

	case DW_OP_mod:
	svalue = (sint_t)(*sp--);
	svalue2 = (sint_t)*sp;
	*sp = (pint_t)(svalue2 % svalue);
	if (log)
	fprintf(stderr, "module\n");
	break;

	case DW_OP_mul:
	svalue = (sint_t)(*sp--);
	svalue2 = (sint_t)*sp;
	sp = (pint_t)(svalue2 svalue);
	if (log)
	fprintf(stderr, "mul\n");
	break;

	case DW_OP_neg:
	sp = 0 - sp;
	if (log)
	fprintf(stderr, "neg\n");
	break;

	case DW_OP_not:
	svalue = (sint_t)(*sp);
	*sp = (pint_t)(~svalue);
	if (log)
	fprintf(stderr, "not\n");
	break;

	case DW_OP_or:
	value = *sp--;
	*sp \|= value;
	if (log)
	fprintf(stderr, "or\n");
	break;

	case DW_OP_plus:
	value = *sp--;
	*sp += value;
	if (log)
	fprintf(stderr, "plus\n");
	break;

	case DW_OP_plus_uconst:
	// pop stack, add uelb128 constant, push result
	*sp += static_cast<pint_t>(addressSpace.getULEB128(p, expressionEnd));
	if (log)
	fprintf(stderr, "add constant\n");
	break;

	case DW_OP_shl:
	value = *sp--;
	sp = sp << value;
	if (log)
	fprintf(stderr, "shift left\n");
	break;

	case DW_OP_shr:
	value = *sp--;
	sp = sp >> value;
	if (log)
	fprintf(stderr, "shift left\n");
	break;

	case DW_OP_shra:
	value = *sp--;
	svalue = (sint_t)*sp;
	*sp = (pint_t)(svalue >> value);
	if (log)
	fprintf(stderr, "shift left arithmetric\n");
	break;

	case DW_OP_xor:
	value = *sp--;
	*sp ^= value;
	if (log)
	fprintf(stderr, "xor\n");
	break;

	case DW_OP_skip:
	svalue = (int16_t) addressSpace.get16(p);
	p += 2;
	p = (pint_t)((sint_t)p + svalue);
	if (log)
	fprintf(stderr, "skip %" PRIu64 "\n", (uint64_t)svalue);
	break;

	case DW_OP_bra:
	svalue = (int16_t) addressSpace.get16(p);
	p += 2;
	if (*sp--)
	p = (pint_t)((sint_t)p + svalue);
	if (log)
	fprintf(stderr, "bra %" PRIu64 "\n", (uint64_t)svalue);
	break;

	case DW_OP_eq:
	value = *sp--;
	sp = (sp == value);
	if (log)
	fprintf(stderr, "eq\n");
	break;

	case DW_OP_ge:
	value = *sp--;
	sp = (sp >= value);
	if (log)
	fprintf(stderr, "ge\n");
	break;

	case DW_OP_gt:
	value = *sp--;
	sp = (sp > value);
	if (log)
	fprintf(stderr, "gt\n");
	break;

	case DW_OP_le:
	value = *sp--;
	sp = (sp <= value);
	if (log)
	fprintf(stderr, "le\n");
	break;

	case DW_OP_lt:
	value = *sp--;
	sp = (sp < value);
	if (log)
	fprintf(stderr, "lt\n");
	break;

	case DW_OP_ne:
	value = *sp--;
	sp = (sp != value);
	if (log)
	fprintf(stderr, "ne\n");
	break;

	case DW_OP_lit0:
	case DW_OP_lit1:
	case DW_OP_lit2:
	case DW_OP_lit3:
	case DW_OP_lit4:
	case DW_OP_lit5:
	case DW_OP_lit6:
	case DW_OP_lit7:
	case DW_OP_lit8:
	case DW_OP_lit9:
	case DW_OP_lit10:
	case DW_OP_lit11:
	case DW_OP_lit12:
	case DW_OP_lit13:
	case DW_OP_lit14:
	case DW_OP_lit15:
	case DW_OP_lit16:
	case DW_OP_lit17:
	case DW_OP_lit18:
	case DW_OP_lit19:
	case DW_OP_lit20:
	case DW_OP_lit21:
	case DW_OP_lit22:
	case DW_OP_lit23:
	case DW_OP_lit24:
	case DW_OP_lit25:
	case DW_OP_lit26:
	case DW_OP_lit27:
	case DW_OP_lit28:
	case DW_OP_lit29:
	case DW_OP_lit30:
	case DW_OP_lit31:
	value = static_cast<pint_t>(opcode - DW_OP_lit0);
	*(++sp) = value;
	if (log)
	fprintf(stderr, "push literal 0x%" PRIx64 "\n", (uint64_t)value);
	break;

	case DW_OP_reg0:
	case DW_OP_reg1:
	case DW_OP_reg2:
	case DW_OP_reg3:
	case DW_OP_reg4:
	case DW_OP_reg5:
	case DW_OP_reg6:
	case DW_OP_reg7:
	case DW_OP_reg8:
	case DW_OP_reg9:
	case DW_OP_reg10:
	case DW_OP_reg11:
	case DW_OP_reg12:
	case DW_OP_reg13:
	case DW_OP_reg14:
	case DW_OP_reg15:
	case DW_OP_reg16:
	case DW_OP_reg17:
	case DW_OP_reg18:
	case DW_OP_reg19:
	case DW_OP_reg20:
	case DW_OP_reg21:
	case DW_OP_reg22:
	case DW_OP_reg23:
	case DW_OP_reg24:
	case DW_OP_reg25:
	case DW_OP_reg26:
	case DW_OP_reg27:
	case DW_OP_reg28:
	case DW_OP_reg29:
	case DW_OP_reg30:
	case DW_OP_reg31:
	reg = static_cast<uint32_t>(opcode - DW_OP_reg0);
	*(++sp) = registers.getRegister((int)reg);
	if (log)
	fprintf(stderr, "push reg %d\n", reg);
	break;

	case DW_OP_regx:
	reg = static_cast<uint32_t>(addressSpace.getULEB128(p, expressionEnd));
	*(++sp) = registers.getRegister((int)reg);
	if (log)
	fprintf(stderr, "push reg %d + 0x%" PRIx64 "\n", reg, (uint64_t)svalue);
	break;

	case DW_OP_breg0:
	case DW_OP_breg1:
	case DW_OP_breg2:
	case DW_OP_breg3:
	case DW_OP_breg4:
	case DW_OP_breg5:
	case DW_OP_breg6:
	case DW_OP_breg7:
	case DW_OP_breg8:
	case DW_OP_breg9:
	case DW_OP_breg10:
	case DW_OP_breg11:
	case DW_OP_breg12:
	case DW_OP_breg13:
	case DW_OP_breg14:
	case DW_OP_breg15:
	case DW_OP_breg16:
	case DW_OP_breg17:
	case DW_OP_breg18:
	case DW_OP_breg19:
	case DW_OP_breg20:
	case DW_OP_breg21:
	case DW_OP_breg22:
	case DW_OP_breg23:
	case DW_OP_breg24:
	case DW_OP_breg25:
	case DW_OP_breg26:
	case DW_OP_breg27:
	case DW_OP_breg28:
	case DW_OP_breg29:
	case DW_OP_breg30:
	case DW_OP_breg31:
	reg = static_cast<uint32_t>(opcode - DW_OP_breg0);
	svalue = (sint_t)addressSpace.getSLEB128(p, expressionEnd);
	svalue += static_cast<sint_t>(registers.getRegister((int)reg));
	*(++sp) = (pint_t)(svalue);
	if (log)
	fprintf(stderr, "push reg %d + 0x%" PRIx64 "\n", reg, (uint64_t)svalue);
	break;

	case DW_OP_bregx:
	reg = static_cast<uint32_t>(addressSpace.getULEB128(p, expressionEnd));
	svalue = (sint_t)addressSpace.getSLEB128(p, expressionEnd);
	svalue += static_cast<sint_t>(registers.getRegister((int)reg));
	*(++sp) = (pint_t)(svalue);
	if (log)
	fprintf(stderr, "push reg %d + 0x%" PRIx64 "\n", reg, (uint64_t)svalue);
	break;

	case DW_OP_fbreg:
	_LIBUNWIND_ABORT("DW_OP_fbreg not implemented");
	break;

	case DW_OP_piece:
	_LIBUNWIND_ABORT("DW_OP_piece not implemented");
	break;

	case DW_OP_deref_size:
	// pop stack, dereference, push result
	value = *sp--;
	switch (addressSpace.get8(p++)) {
	case 1:
	value = addressSpace.get8(value);
	break;
	case 2:
	value = addressSpace.get16(value);
	break;
	case 4:
	value = addressSpace.get32(value);
	break;
	case 8:
	value = (pint_t)addressSpace.get64(value);
	break;
	default:
	_LIBUNWIND_ABORT("DW_OP_deref_size with bad size");
	}
	*(++sp) = value;
	if (log)
	fprintf(stderr, "sized dereference 0x%" PRIx64 "\n", (uint64_t)value);
	break;

	case DW_OP_xderef_size:
	case DW_OP_nop:
	case DW_OP_push_object_addres:
	case DW_OP_call2:
	case DW_OP_call4:
	case DW_OP_call_ref:
	default:
	_LIBUNWIND_ABORT("DWARF opcode not implemented");
	}

	}
	if (log)
	fprintf(stderr, "expression evaluates to 0x%" PRIx64 "\n", (uint64_t)*sp);
	return *sp;
	}



	} // namespace libunwind

	#endif // __DWARF_INSTRUCTIONS_HPP__
	Index: head/contrib/libunwind/src/assembly.h
	===================================================================
	--- head/contrib/libunwind/src/assembly.h (revision 349792)
	+++ head/contrib/libunwind/src/assembly.h (revision 349793)
	@@ -1,122 +1,138 @@
	/* ===-- assembly.h - libUnwind assembler support macros -------------------===
	*
	* The LLVM Compiler Infrastructure
	*
	* This file is dual licensed under the MIT and the University of Illinois Open
	* Source Licenses. See LICENSE.TXT for details.
	*
	* ===----------------------------------------------------------------------===
	*
	* This file defines macros for use in libUnwind assembler source.
	* This file is not part of the interface of this library.
	*
	* ===----------------------------------------------------------------------===
	*/

	#ifndef UNWIND_ASSEMBLY_H
	#define UNWIND_ASSEMBLY_H

	#if defined(__powerpc64__)
	#define SEPARATOR ;
	#define PPC64_OFFS_SRR0 0
	#define PPC64_OFFS_CR 272
	#define PPC64_OFFS_XER 280
	#define PPC64_OFFS_LR 288
	#define PPC64_OFFS_CTR 296
	#define PPC64_OFFS_VRSAVE 304
	#define PPC64_OFFS_FP 312
	#define PPC64_OFFS_V 824
	#ifdef _ARCH_PWR8
	#define PPC64_HAS_VMX
	#endif
	#elif defined(__arm64__)
	#define SEPARATOR %%
	#else
	#define SEPARATOR ;
	#endif

	+#if defined(__powerpc64__) && (!defined(_CALL_ELF) \|\| _CALL_ELF == 1)
	+#define PPC64_OPD1 .section .opd,"aw",@progbits SEPARATOR
	+#define PPC64_OPD2 SEPARATOR \
	+ .p2align 3 SEPARATOR \
	+ .quad .Lfunc_begin0 SEPARATOR \
	+ .quad .TOC.@tocbase SEPARATOR \
	+ .quad 0 SEPARATOR \
	+ .text SEPARATOR \
	+.Lfunc_begin0:
	+#else
	+#define PPC64_OPD1
	+#define PPC64_OPD2
	+#endif
	+
	#define GLUE2(a, b) a ## b
	#define GLUE(a, b) GLUE2(a, b)
	#define SYMBOL_NAME(name) GLUE(__USER_LABEL_PREFIX__, name)

	#if defined(__APPLE__)

	#define SYMBOL_IS_FUNC(name)
	#define EXPORT_SYMBOL(name)
	#define HIDDEN_SYMBOL(name) .private_extern name
	#define NO_EXEC_STACK_DIRECTIVE

	#elif defined(__ELF__)

	#if defined(__arm__)
	#define SYMBOL_IS_FUNC(name) .type name,%function
	#else
	#define SYMBOL_IS_FUNC(name) .type name,@function
	#endif
	#define EXPORT_SYMBOL(name)
	#define HIDDEN_SYMBOL(name) .hidden name

	#if defined(__GNU__) \|\| defined(__FreeBSD__) \|\| defined(__Fuchsia__) \|\| \
	defined(__linux__)
	#define NO_EXEC_STACK_DIRECTIVE .section .note.GNU-stack,"",%progbits
	#else
	#define NO_EXEC_STACK_DIRECTIVE
	#endif

	#elif defined(_WIN32)

	#define SYMBOL_IS_FUNC(name) \
	.def name SEPARATOR \
	.scl 2 SEPARATOR \
	.type 32 SEPARATOR \
	.endef
	#define EXPORT_SYMBOL2(name) \
	.section .drectve,"yn" SEPARATOR \
	.ascii "-export:", #name, "\0" SEPARATOR \
	.text
	#if defined(_LIBUNWIND_DISABLE_VISIBILITY_ANNOTATIONS)
	#define EXPORT_SYMBOL(name)
	#else
	#define EXPORT_SYMBOL(name) EXPORT_SYMBOL2(name)
	#endif
	#define HIDDEN_SYMBOL(name)

	#define NO_EXEC_STACK_DIRECTIVE

	#elif defined(__sparc__)

	#else

	#error Unsupported target

	#endif

	#define DEFINE_LIBUNWIND_FUNCTION(name) \
	.globl SYMBOL_NAME(name) SEPARATOR \
	EXPORT_SYMBOL(name) SEPARATOR \
	SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \
	- SYMBOL_NAME(name):
	+ PPC64_OPD1 \
	+ SYMBOL_NAME(name): \
	+ PPC64_OPD2

	#define DEFINE_LIBUNWIND_PRIVATE_FUNCTION(name) \
	.globl SYMBOL_NAME(name) SEPARATOR \
	HIDDEN_SYMBOL(SYMBOL_NAME(name)) SEPARATOR \
	SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \
	SYMBOL_NAME(name):

	#if defined(__arm__)
	#if !defined(__ARM_ARCH)
	#define __ARM_ARCH 4
	#endif

	#if defined(__ARM_ARCH_4T__) \|\| __ARM_ARCH >= 5
	#define ARM_HAS_BX
	#endif

	#ifdef ARM_HAS_BX
	#define JMP(r) bx r
	#else
	#define JMP(r) mov pc, r
	#endif
	#endif /* __arm__ */

	#endif /* UNWIND_ASSEMBLY_H */
	Index: head/contrib/libunwind
	===================================================================
	--- head/contrib/libunwind (revision 349792)
	+++ head/contrib/libunwind (revision 349793)

	Property changes on: head/contrib/libunwind
	___________________________________________________________________
	Modified: svn:mergeinfo
	## -0,0 +0,1 ##
	Merged /vendor/llvm-libunwind/dist-release_80:r348963-349790
	Index: head/contrib/llvm/tools/clang/lib/AST/MicrosoftMangle.cpp
	===================================================================
	--- head/contrib/llvm/tools/clang/lib/AST/MicrosoftMangle.cpp (revision 349792)
	+++ head/contrib/llvm/tools/clang/lib/AST/MicrosoftMangle.cpp (revision 349793)
	@@ -1,3488 +1,3491 @@
	//===--- MicrosoftMangle.cpp - Microsoft Visual C++ Name Mangling ---------===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// This provides C++ name mangling targeting the Microsoft Visual C++ ABI.
	//
	//===----------------------------------------------------------------------===//

	#include "clang/AST/Mangle.h"
	#include "clang/AST/ASTContext.h"
	#include "clang/AST/Attr.h"
	#include "clang/AST/CXXInheritance.h"
	#include "clang/AST/CharUnits.h"
	#include "clang/AST/Decl.h"
	#include "clang/AST/DeclCXX.h"
	#include "clang/AST/DeclObjC.h"
	#include "clang/AST/DeclOpenMP.h"
	#include "clang/AST/DeclTemplate.h"
	#include "clang/AST/Expr.h"
	#include "clang/AST/ExprCXX.h"
	#include "clang/AST/VTableBuilder.h"
	#include "clang/Basic/ABI.h"
	#include "clang/Basic/DiagnosticOptions.h"
	#include "clang/Basic/TargetInfo.h"
	#include "llvm/ADT/StringExtras.h"
	#include "llvm/Support/JamCRC.h"
	#include "llvm/Support/xxhash.h"
	#include "llvm/Support/MD5.h"
	#include "llvm/Support/MathExtras.h"

	using namespace clang;

	namespace {

	struct msvc_hashing_ostream : public llvm::raw_svector_ostream {
	raw_ostream &OS;
	llvm::SmallString<64> Buffer;

	msvc_hashing_ostream(raw_ostream &OS)
	: llvm::raw_svector_ostream(Buffer), OS(OS) {}
	~msvc_hashing_ostream() override {
	StringRef MangledName = str();
	bool StartsWithEscape = MangledName.startswith("\01");
	if (StartsWithEscape)
	MangledName = MangledName.drop_front(1);
	if (MangledName.size() <= 4096) {
	OS << str();
	return;
	}

	llvm::MD5 Hasher;
	llvm::MD5::MD5Result Hash;
	Hasher.update(MangledName);
	Hasher.final(Hash);

	SmallString<32> HexString;
	llvm::MD5::stringifyResult(Hash, HexString);

	if (StartsWithEscape)
	OS << '\01';
	OS << "??@" << HexString << '@';
	}
	};

	static const DeclContext *
	getLambdaDefaultArgumentDeclContext(const Decl *D) {
	if (const auto *RD = dyn_cast<CXXRecordDecl>(D))
	if (RD->isLambda())
	if (const auto *Parm =
	dyn_cast_or_null<ParmVarDecl>(RD->getLambdaContextDecl()))
	return Parm->getDeclContext();
	return nullptr;
	}

	/// Retrieve the declaration context that should be used when mangling
	/// the given declaration.
	static const DeclContext getEffectiveDeclContext(const Decl D) {
	// The ABI assumes that lambda closure types that occur within
	// default arguments live in the context of the function. However, due to
	// the way in which Clang parses and creates function declarations, this is
	// not the case: the lambda closure type ends up living in the context
	// where the function itself resides, because the function declaration itself
	// had not yet been created. Fix the context here.
	if (const auto *LDADC = getLambdaDefaultArgumentDeclContext(D))
	return LDADC;

	// Perform the same check for block literals.
	if (const BlockDecl *BD = dyn_cast<BlockDecl>(D)) {
	if (ParmVarDecl *ContextParam =
	dyn_cast_or_null<ParmVarDecl>(BD->getBlockManglingContextDecl()))
	return ContextParam->getDeclContext();
	}

	const DeclContext *DC = D->getDeclContext();
	if (isa<CapturedDecl>(DC) \|\| isa<OMPDeclareReductionDecl>(DC)) {
	return getEffectiveDeclContext(cast<Decl>(DC));
	}

	return DC->getRedeclContext();
	}

	static const DeclContext getEffectiveParentContext(const DeclContext DC) {
	return getEffectiveDeclContext(cast<Decl>(DC));
	}

	static const FunctionDecl getStructor(const NamedDecl ND) {
	if (const auto *FTD = dyn_cast<FunctionTemplateDecl>(ND))
	return FTD->getTemplatedDecl()->getCanonicalDecl();

	const auto *FD = cast<FunctionDecl>(ND);
	if (const auto *FTD = FD->getPrimaryTemplate())
	return FTD->getTemplatedDecl()->getCanonicalDecl();

	return FD->getCanonicalDecl();
	}

	/// MicrosoftMangleContextImpl - Overrides the default MangleContext for the
	/// Microsoft Visual C++ ABI.
	class MicrosoftMangleContextImpl : public MicrosoftMangleContext {
	typedef std::pair<const DeclContext , IdentifierInfo > DiscriminatorKeyTy;
	llvm::DenseMap<DiscriminatorKeyTy, unsigned> Discriminator;
	llvm::DenseMap<const NamedDecl *, unsigned> Uniquifier;
	llvm::DenseMap<const CXXRecordDecl *, unsigned> LambdaIds;
	llvm::DenseMap<const NamedDecl *, unsigned> SEHFilterIds;
	llvm::DenseMap<const NamedDecl *, unsigned> SEHFinallyIds;
	SmallString<16> AnonymousNamespaceHash;

	public:
	MicrosoftMangleContextImpl(ASTContext &Context, DiagnosticsEngine &Diags);
	bool shouldMangleCXXName(const NamedDecl *D) override;
	bool shouldMangleStringLiteral(const StringLiteral *SL) override;
	void mangleCXXName(const NamedDecl *D, raw_ostream &Out) override;
	void mangleVirtualMemPtrThunk(const CXXMethodDecl *MD,
	const MethodVFTableLocation &ML,
	raw_ostream &Out) override;
	void mangleThunk(const CXXMethodDecl *MD, const ThunkInfo &Thunk,
	raw_ostream &) override;
	void mangleCXXDtorThunk(const CXXDestructorDecl *DD, CXXDtorType Type,
	const ThisAdjustment &ThisAdjustment,
	raw_ostream &) override;
	void mangleCXXVFTable(const CXXRecordDecl *Derived,
	ArrayRef<const CXXRecordDecl *> BasePath,
	raw_ostream &Out) override;
	void mangleCXXVBTable(const CXXRecordDecl *Derived,
	ArrayRef<const CXXRecordDecl *> BasePath,
	raw_ostream &Out) override;
	void mangleCXXVirtualDisplacementMap(const CXXRecordDecl *SrcRD,
	const CXXRecordDecl *DstRD,
	raw_ostream &Out) override;
	void mangleCXXThrowInfo(QualType T, bool IsConst, bool IsVolatile,
	bool IsUnaligned, uint32_t NumEntries,
	raw_ostream &Out) override;
	void mangleCXXCatchableTypeArray(QualType T, uint32_t NumEntries,
	raw_ostream &Out) override;
	void mangleCXXCatchableType(QualType T, const CXXConstructorDecl *CD,
	CXXCtorType CT, uint32_t Size, uint32_t NVOffset,
	int32_t VBPtrOffset, uint32_t VBIndex,
	raw_ostream &Out) override;
	void mangleCXXRTTI(QualType T, raw_ostream &Out) override;
	void mangleCXXRTTIName(QualType T, raw_ostream &Out) override;
	void mangleCXXRTTIBaseClassDescriptor(const CXXRecordDecl *Derived,
	uint32_t NVOffset, int32_t VBPtrOffset,
	uint32_t VBTableOffset, uint32_t Flags,
	raw_ostream &Out) override;
	void mangleCXXRTTIBaseClassArray(const CXXRecordDecl *Derived,
	raw_ostream &Out) override;
	void mangleCXXRTTIClassHierarchyDescriptor(const CXXRecordDecl *Derived,
	raw_ostream &Out) override;
	void
	mangleCXXRTTICompleteObjectLocator(const CXXRecordDecl *Derived,
	ArrayRef<const CXXRecordDecl *> BasePath,
	raw_ostream &Out) override;
	void mangleTypeName(QualType T, raw_ostream &) override;
	void mangleCXXCtor(const CXXConstructorDecl *D, CXXCtorType Type,
	raw_ostream &) override;
	void mangleCXXDtor(const CXXDestructorDecl *D, CXXDtorType Type,
	raw_ostream &) override;
	void mangleReferenceTemporary(const VarDecl *, unsigned ManglingNumber,
	raw_ostream &) override;
	void mangleStaticGuardVariable(const VarDecl *D, raw_ostream &Out) override;
	void mangleThreadSafeStaticGuardVariable(const VarDecl *D, unsigned GuardNum,
	raw_ostream &Out) override;
	void mangleDynamicInitializer(const VarDecl *D, raw_ostream &Out) override;
	void mangleDynamicAtExitDestructor(const VarDecl *D,
	raw_ostream &Out) override;
	void mangleSEHFilterExpression(const NamedDecl *EnclosingDecl,
	raw_ostream &Out) override;
	void mangleSEHFinallyBlock(const NamedDecl *EnclosingDecl,
	raw_ostream &Out) override;
	void mangleStringLiteral(const StringLiteral *SL, raw_ostream &Out) override;
	bool getNextDiscriminator(const NamedDecl *ND, unsigned &disc) {
	const DeclContext *DC = getEffectiveDeclContext(ND);
	if (!DC->isFunctionOrMethod())
	return false;

	// Lambda closure types are already numbered, give out a phony number so
	// that they demangle nicely.
	if (const auto *RD = dyn_cast<CXXRecordDecl>(ND)) {
	if (RD->isLambda()) {
	disc = 1;
	return true;
	}
	}

	// Use the canonical number for externally visible decls.
	if (ND->isExternallyVisible()) {
	disc = getASTContext().getManglingNumber(ND);
	return true;
	}

	// Anonymous tags are already numbered.
	if (const TagDecl *Tag = dyn_cast<TagDecl>(ND)) {
	if (!Tag->hasNameForLinkage() &&
	!getASTContext().getDeclaratorForUnnamedTagDecl(Tag) &&
	!getASTContext().getTypedefNameForUnnamedTagDecl(Tag))
	return false;
	}

	// Make up a reasonable number for internal decls.
	unsigned &discriminator = Uniquifier[ND];
	if (!discriminator)
	discriminator = ++Discriminator[std::make_pair(DC, ND->getIdentifier())];
	disc = discriminator + 1;
	return true;
	}

	unsigned getLambdaId(const CXXRecordDecl *RD) {
	assert(RD->isLambda() && "RD must be a lambda!");
	assert(!RD->isExternallyVisible() && "RD must not be visible!");
	assert(RD->getLambdaManglingNumber() == 0 &&
	"RD must not have a mangling number!");
	std::pair<llvm::DenseMap<const CXXRecordDecl *, unsigned>::iterator, bool>
	Result = LambdaIds.insert(std::make_pair(RD, LambdaIds.size()));
	return Result.first->second;
	}

	/// Return a character sequence that is (somewhat) unique to the TU suitable
	/// for mangling anonymous namespaces.
	StringRef getAnonymousNamespaceHash() const {
	return AnonymousNamespaceHash;
	}

	private:
	void mangleInitFiniStub(const VarDecl *D, char CharCode, raw_ostream &Out);
	};

	/// MicrosoftCXXNameMangler - Manage the mangling of a single name for the
	/// Microsoft Visual C++ ABI.
	class MicrosoftCXXNameMangler {
	MicrosoftMangleContextImpl &Context;
	raw_ostream &Out;

	/// The "structor" is the top-level declaration being mangled, if
	/// that's not a template specialization; otherwise it's the pattern
	/// for that specialization.
	const NamedDecl *Structor;
	unsigned StructorType;

	typedef llvm::SmallVector<std::string, 10> BackRefVec;
	BackRefVec NameBackReferences;

	typedef llvm::DenseMap<const void *, unsigned> ArgBackRefMap;
	ArgBackRefMap TypeBackReferences;

	typedef std::set<int> PassObjectSizeArgsSet;
	PassObjectSizeArgsSet PassObjectSizeArgs;

	ASTContext &getASTContext() const { return Context.getASTContext(); }

	// FIXME: If we add support for __ptr32/64 qualifiers, then we should push
	// this check into mangleQualifiers().
	const bool PointersAre64Bit;

	public:
	enum QualifierMangleMode { QMM_Drop, QMM_Mangle, QMM_Escape, QMM_Result };

	MicrosoftCXXNameMangler(MicrosoftMangleContextImpl &C, raw_ostream &Out_)
	: Context(C), Out(Out_), Structor(nullptr), StructorType(-1),
	PointersAre64Bit(C.getASTContext().getTargetInfo().getPointerWidth(0) ==
	64) {}

	MicrosoftCXXNameMangler(MicrosoftMangleContextImpl &C, raw_ostream &Out_,
	const CXXConstructorDecl *D, CXXCtorType Type)
	: Context(C), Out(Out_), Structor(getStructor(D)), StructorType(Type),
	PointersAre64Bit(C.getASTContext().getTargetInfo().getPointerWidth(0) ==
	64) {}

	MicrosoftCXXNameMangler(MicrosoftMangleContextImpl &C, raw_ostream &Out_,
	const CXXDestructorDecl *D, CXXDtorType Type)
	: Context(C), Out(Out_), Structor(getStructor(D)), StructorType(Type),
	PointersAre64Bit(C.getASTContext().getTargetInfo().getPointerWidth(0) ==
	64) {}

	raw_ostream &getStream() const { return Out; }

	void mangle(const NamedDecl *D, StringRef Prefix = "?");
	void mangleName(const NamedDecl *ND);
	void mangleFunctionEncoding(const FunctionDecl *FD, bool ShouldMangle);
	void mangleVariableEncoding(const VarDecl *VD);
	void mangleMemberDataPointer(const CXXRecordDecl RD, const ValueDecl VD);
	void mangleMemberFunctionPointer(const CXXRecordDecl *RD,
	const CXXMethodDecl *MD);
	void mangleVirtualMemPtrThunk(const CXXMethodDecl *MD,
	const MethodVFTableLocation &ML);
	void mangleNumber(int64_t Number);
	void mangleTagTypeKind(TagTypeKind TK);
	void mangleArtificialTagType(TagTypeKind TK, StringRef UnqualifiedName,
	ArrayRef<StringRef> NestedNames = None);
	void mangleAddressSpaceType(QualType T, Qualifiers Quals, SourceRange Range);
	void mangleType(QualType T, SourceRange Range,
	QualifierMangleMode QMM = QMM_Mangle);
	void mangleFunctionType(const FunctionType *T,
	const FunctionDecl *D = nullptr,
	bool ForceThisQuals = false,
	bool MangleExceptionSpec = true);
	void mangleNestedName(const NamedDecl *ND);

	private:
	bool isStructorDecl(const NamedDecl *ND) const {
	return ND == Structor \|\| getStructor(ND) == Structor;
	}

	void mangleUnqualifiedName(const NamedDecl *ND) {
	mangleUnqualifiedName(ND, ND->getDeclName());
	}
	void mangleUnqualifiedName(const NamedDecl *ND, DeclarationName Name);
	void mangleSourceName(StringRef Name);
	void mangleOperatorName(OverloadedOperatorKind OO, SourceLocation Loc);
	void mangleCXXDtorType(CXXDtorType T);
	void mangleQualifiers(Qualifiers Quals, bool IsMember);
	void mangleRefQualifier(RefQualifierKind RefQualifier);
	void manglePointerCVQualifiers(Qualifiers Quals);
	void manglePointerExtQualifiers(Qualifiers Quals, QualType PointeeType);

	void mangleUnscopedTemplateName(const TemplateDecl *ND);
	void
	mangleTemplateInstantiationName(const TemplateDecl *TD,
	const TemplateArgumentList &TemplateArgs);
	void mangleObjCMethodName(const ObjCMethodDecl *MD);

	void mangleArgumentType(QualType T, SourceRange Range);
	void manglePassObjectSizeArg(const PassObjectSizeAttr *POSA);

	bool isArtificialTagType(QualType T) const;

	// Declare manglers for every type class.
	#define ABSTRACT_TYPE(CLASS, PARENT)
	#define NON_CANONICAL_TYPE(CLASS, PARENT)
	#define TYPE(CLASS, PARENT) void mangleType(const CLASS##Type *T, \
	Qualifiers Quals, \
	SourceRange Range);
	#include "clang/AST/TypeNodes.def"
	#undef ABSTRACT_TYPE
	#undef NON_CANONICAL_TYPE
	#undef TYPE

	void mangleType(const TagDecl *TD);
	void mangleDecayedArrayType(const ArrayType *T);
	void mangleArrayType(const ArrayType *T);
	void mangleFunctionClass(const FunctionDecl *FD);
	void mangleCallingConvention(CallingConv CC);
	void mangleCallingConvention(const FunctionType *T);
	void mangleIntegerLiteral(const llvm::APSInt &Number, bool IsBoolean);
	void mangleExpression(const Expr *E);
	void mangleThrowSpecification(const FunctionProtoType *T);

	void mangleTemplateArgs(const TemplateDecl *TD,
	const TemplateArgumentList &TemplateArgs);
	void mangleTemplateArg(const TemplateDecl *TD, const TemplateArgument &TA,
	const NamedDecl *Parm);

	void mangleObjCProtocol(const ObjCProtocolDecl *PD);
	void mangleObjCLifetime(const QualType T, Qualifiers Quals,
	SourceRange Range);
	void mangleObjCKindOfType(const ObjCObjectType *T, Qualifiers Quals,
	SourceRange Range);
	};
	}

	MicrosoftMangleContextImpl::MicrosoftMangleContextImpl(ASTContext &Context,
	DiagnosticsEngine &Diags)
	: MicrosoftMangleContext(Context, Diags) {
	// To mangle anonymous namespaces, hash the path to the main source file. The
	// path should be whatever (probably relative) path was passed on the command
	// line. The goal is for the compiler to produce the same output regardless of
	// working directory, so use the uncanonicalized relative path.
	//
	// It's important to make the mangled names unique because, when CodeView
	// debug info is in use, the debugger uses mangled type names to distinguish
	// between otherwise identically named types in anonymous namespaces.
	//
	// These symbols are always internal, so there is no need for the hash to
	// match what MSVC produces. For the same reason, clang is free to change the
	// hash at any time without breaking compatibility with old versions of clang.
	// The generated names are intended to look similar to what MSVC generates,
	// which are something like "?A0x01234567@".
	SourceManager &SM = Context.getSourceManager();
	if (const FileEntry *FE = SM.getFileEntryForID(SM.getMainFileID())) {
	// Truncate the hash so we get 8 characters of hexadecimal.
	uint32_t TruncatedHash = uint32_t(xxHash64(FE->getName()));
	AnonymousNamespaceHash = llvm::utohexstr(TruncatedHash);
	} else {
	// If we don't have a path to the main file, we'll just use 0.
	AnonymousNamespaceHash = "0";
	}
	}

	bool MicrosoftMangleContextImpl::shouldMangleCXXName(const NamedDecl *D) {
	if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
	LanguageLinkage L = FD->getLanguageLinkage();
	// Overloadable functions need mangling.
	if (FD->hasAttr<OverloadableAttr>())
	return true;

	// The ABI expects that we would never mangle "typical" user-defined entry
	// points regardless of visibility or freestanding-ness.
	//
	// N.B. This is distinct from asking about "main". "main" has a lot of
	// special rules associated with it in the standard while these
	// user-defined entry points are outside of the purview of the standard.
	// For example, there can be only one definition for "main" in a standards
	// compliant program; however nothing forbids the existence of wmain and
	// WinMain in the same translation unit.
	if (FD->isMSVCRTEntryPoint())
	return false;

	// C++ functions and those whose names are not a simple identifier need
	// mangling.
	if (!FD->getDeclName().isIdentifier() \|\| L == CXXLanguageLinkage)
	return true;

	// C functions are not mangled.
	if (L == CLanguageLinkage)
	return false;
	}

	// Otherwise, no mangling is done outside C++ mode.
	if (!getASTContext().getLangOpts().CPlusPlus)
	return false;

	const VarDecl *VD = dyn_cast<VarDecl>(D);
	if (VD && !isa<DecompositionDecl>(D)) {
	// C variables are not mangled.
	if (VD->isExternC())
	return false;

	// Variables at global scope with non-internal linkage are not mangled.
	const DeclContext *DC = getEffectiveDeclContext(D);
	// Check for extern variable declared locally.
	if (DC->isFunctionOrMethod() && D->hasLinkage())
	while (!DC->isNamespace() && !DC->isTranslationUnit())
	DC = getEffectiveParentContext(DC);

	if (DC->isTranslationUnit() && D->getFormalLinkage() == InternalLinkage &&
	!isa<VarTemplateSpecializationDecl>(D) &&
	D->getIdentifier() != nullptr)
	return false;
	}

	return true;
	}

	bool
	MicrosoftMangleContextImpl::shouldMangleStringLiteral(const StringLiteral *SL) {
	return true;
	}

	void MicrosoftCXXNameMangler::mangle(const NamedDecl *D, StringRef Prefix) {
	// MSVC doesn't mangle C++ names the same way it mangles extern "C" names.
	// Therefore it's really important that we don't decorate the
	// name with leading underscores or leading/trailing at signs. So, by
	// default, we emit an asm marker at the start so we get the name right.
	// Callers can override this with a custom prefix.

	// <mangled-name> ::= ? <name> <type-encoding>
	Out << Prefix;
	mangleName(D);
	if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D))
	mangleFunctionEncoding(FD, Context.shouldMangleDeclName(FD));
	else if (const VarDecl *VD = dyn_cast<VarDecl>(D))
	mangleVariableEncoding(VD);
	else
	llvm_unreachable("Tried to mangle unexpected NamedDecl!");
	}

	void MicrosoftCXXNameMangler::mangleFunctionEncoding(const FunctionDecl *FD,
	bool ShouldMangle) {
	// <type-encoding> ::= <function-class> <function-type>

	// Since MSVC operates on the type as written and not the canonical type, it
	// actually matters which decl we have here. MSVC appears to choose the
	// first, since it is most likely to be the declaration in a header file.
	FD = FD->getFirstDecl();

	// We should never ever see a FunctionNoProtoType at this point.
	// We don't even know how to mangle their types anyway :).
	const FunctionProtoType *FT = FD->getType()->castAs<FunctionProtoType>();

	// extern "C" functions can hold entities that must be mangled.
	// As it stands, these functions still need to get expressed in the full
	// external name. They have their class and type omitted, replaced with '9'.
	if (ShouldMangle) {
	// We would like to mangle all extern "C" functions using this additional
	// component but this would break compatibility with MSVC's behavior.
	// Instead, do this when we know that compatibility isn't important (in
	// other words, when it is an overloaded extern "C" function).
	if (FD->isExternC() && FD->hasAttr<OverloadableAttr>())
	Out << "$$J0";

	mangleFunctionClass(FD);

	mangleFunctionType(FT, FD, false, false);
	} else {
	Out << '9';
	}
	}

	void MicrosoftCXXNameMangler::mangleVariableEncoding(const VarDecl *VD) {
	// <type-encoding> ::= <storage-class> <variable-type>
	// <storage-class> ::= 0 # private static member
	// ::= 1 # protected static member
	// ::= 2 # public static member
	// ::= 3 # global
	// ::= 4 # static local

	// The first character in the encoding (after the name) is the storage class.
	if (VD->isStaticDataMember()) {
	// If it's a static member, it also encodes the access level.
	switch (VD->getAccess()) {
	default:
	case AS_private: Out << '0'; break;
	case AS_protected: Out << '1'; break;
	case AS_public: Out << '2'; break;
	}
	}
	else if (!VD->isStaticLocal())
	Out << '3';
	else
	Out << '4';
	// Now mangle the type.
	// <variable-type> ::= <type> <cvr-qualifiers>
	// ::= <type> <pointee-cvr-qualifiers> # pointers, references
	// Pointers and references are odd. The type of 'int * const foo;' gets
	// mangled as 'QAHA' instead of 'PAHB', for example.
	SourceRange SR = VD->getSourceRange();
	QualType Ty = VD->getType();
	if (Ty->isPointerType() \|\| Ty->isReferenceType() \|\|
	Ty->isMemberPointerType()) {
	mangleType(Ty, SR, QMM_Drop);
	manglePointerExtQualifiers(
	Ty.getDesugaredType(getASTContext()).getLocalQualifiers(), QualType());
	if (const MemberPointerType *MPT = Ty->getAs<MemberPointerType>()) {
	mangleQualifiers(MPT->getPointeeType().getQualifiers(), true);
	// Member pointers are suffixed with a back reference to the member
	// pointer's class name.
	mangleName(MPT->getClass()->getAsCXXRecordDecl());
	} else
	mangleQualifiers(Ty->getPointeeType().getQualifiers(), false);
	} else if (const ArrayType *AT = getASTContext().getAsArrayType(Ty)) {
	// Global arrays are funny, too.
	mangleDecayedArrayType(AT);
	if (AT->getElementType()->isArrayType())
	Out << 'A';
	else
	mangleQualifiers(Ty.getQualifiers(), false);
	} else {
	mangleType(Ty, SR, QMM_Drop);
	mangleQualifiers(Ty.getQualifiers(), false);
	}
	}

	void MicrosoftCXXNameMangler::mangleMemberDataPointer(const CXXRecordDecl *RD,
	const ValueDecl *VD) {
	// <member-data-pointer> ::= <integer-literal>
	// ::= $F <number> <number>
	// ::= $G <number> <number> <number>

	int64_t FieldOffset;
	int64_t VBTableOffset;
	MSInheritanceAttr::Spelling IM = RD->getMSInheritanceModel();
	if (VD) {
	FieldOffset = getASTContext().getFieldOffset(VD);
	assert(FieldOffset % getASTContext().getCharWidth() == 0 &&
	"cannot take address of bitfield");
	FieldOffset /= getASTContext().getCharWidth();

	VBTableOffset = 0;

	if (IM == MSInheritanceAttr::Keyword_virtual_inheritance)
	FieldOffset -= getASTContext().getOffsetOfBaseWithVBPtr(RD).getQuantity();
	} else {
	FieldOffset = RD->nullFieldOffsetIsZero() ? 0 : -1;

	VBTableOffset = -1;
	}

	char Code = '\0';
	switch (IM) {
	case MSInheritanceAttr::Keyword_single_inheritance: Code = '0'; break;
	case MSInheritanceAttr::Keyword_multiple_inheritance: Code = '0'; break;
	case MSInheritanceAttr::Keyword_virtual_inheritance: Code = 'F'; break;
	case MSInheritanceAttr::Keyword_unspecified_inheritance: Code = 'G'; break;
	}

	Out << '$' << Code;

	mangleNumber(FieldOffset);

	// The C++ standard doesn't allow base-to-derived member pointer conversions
	// in template parameter contexts, so the vbptr offset of data member pointers
	// is always zero.
	if (MSInheritanceAttr::hasVBPtrOffsetField(IM))
	mangleNumber(0);
	if (MSInheritanceAttr::hasVBTableOffsetField(IM))
	mangleNumber(VBTableOffset);
	}

	void
	MicrosoftCXXNameMangler::mangleMemberFunctionPointer(const CXXRecordDecl *RD,
	const CXXMethodDecl *MD) {
	// <member-function-pointer> ::= $1? <name>
	// ::= $H? <name> <number>
	// ::= $I? <name> <number> <number>
	// ::= $J? <name> <number> <number> <number>

	MSInheritanceAttr::Spelling IM = RD->getMSInheritanceModel();

	char Code = '\0';
	switch (IM) {
	case MSInheritanceAttr::Keyword_single_inheritance: Code = '1'; break;
	case MSInheritanceAttr::Keyword_multiple_inheritance: Code = 'H'; break;
	case MSInheritanceAttr::Keyword_virtual_inheritance: Code = 'I'; break;
	case MSInheritanceAttr::Keyword_unspecified_inheritance: Code = 'J'; break;
	}

	// If non-virtual, mangle the name. If virtual, mangle as a virtual memptr
	// thunk.
	uint64_t NVOffset = 0;
	uint64_t VBTableOffset = 0;
	uint64_t VBPtrOffset = 0;
	if (MD) {
	Out << '$' << Code << '?';
	if (MD->isVirtual()) {
	MicrosoftVTableContext *VTContext =
	cast<MicrosoftVTableContext>(getASTContext().getVTableContext());
	MethodVFTableLocation ML =
	VTContext->getMethodVFTableLocation(GlobalDecl(MD));
	mangleVirtualMemPtrThunk(MD, ML);
	NVOffset = ML.VFPtrOffset.getQuantity();
	VBTableOffset = ML.VBTableIndex * 4;
	if (ML.VBase) {
	const ASTRecordLayout &Layout = getASTContext().getASTRecordLayout(RD);
	VBPtrOffset = Layout.getVBPtrOffset().getQuantity();
	}
	} else {
	mangleName(MD);
	mangleFunctionEncoding(MD, /ShouldMangle=/true);
	}

	if (VBTableOffset == 0 &&
	IM == MSInheritanceAttr::Keyword_virtual_inheritance)
	NVOffset -= getASTContext().getOffsetOfBaseWithVBPtr(RD).getQuantity();
	} else {
	// Null single inheritance member functions are encoded as a simple nullptr.
	if (IM == MSInheritanceAttr::Keyword_single_inheritance) {
	Out << "$0A@";
	return;
	}
	if (IM == MSInheritanceAttr::Keyword_unspecified_inheritance)
	VBTableOffset = -1;
	Out << '$' << Code;
	}

	if (MSInheritanceAttr::hasNVOffsetField(/IsMemberFunction=/true, IM))
	mangleNumber(static_cast<uint32_t>(NVOffset));
	if (MSInheritanceAttr::hasVBPtrOffsetField(IM))
	mangleNumber(VBPtrOffset);
	if (MSInheritanceAttr::hasVBTableOffsetField(IM))
	mangleNumber(VBTableOffset);
	}

	void MicrosoftCXXNameMangler::mangleVirtualMemPtrThunk(
	const CXXMethodDecl *MD, const MethodVFTableLocation &ML) {
	// Get the vftable offset.
	CharUnits PointerWidth = getASTContext().toCharUnitsFromBits(
	getASTContext().getTargetInfo().getPointerWidth(0));
	uint64_t OffsetInVFTable = ML.Index * PointerWidth.getQuantity();

	Out << "?_9";
	mangleName(MD->getParent());
	Out << "$B";
	mangleNumber(OffsetInVFTable);
	Out << 'A';
	mangleCallingConvention(MD->getType()->getAs<FunctionProtoType>());
	}

	void MicrosoftCXXNameMangler::mangleName(const NamedDecl *ND) {
	// <name> ::= <unscoped-name> {[<named-scope>]+ \| [<nested-name>]}? @

	// Always start with the unqualified name.
	mangleUnqualifiedName(ND);

	mangleNestedName(ND);

	// Terminate the whole name with an '@'.
	Out << '@';
	}

	void MicrosoftCXXNameMangler::mangleNumber(int64_t Number) {
	// <non-negative integer> ::= A@ # when Number == 0
	// ::= <decimal digit> # when 1 <= Number <= 10
	// ::= <hex digit>+ @ # when Number >= 10
	//
	// <number> ::= [?] <non-negative integer>

	uint64_t Value = static_cast<uint64_t>(Number);
	if (Number < 0) {
	Value = -Value;
	Out << '?';
	}

	if (Value == 0)
	Out << "A@";
	else if (Value >= 1 && Value <= 10)
	Out << (Value - 1);
	else {
	// Numbers that are not encoded as decimal digits are represented as nibbles
	// in the range of ASCII characters 'A' to 'P'.
	// The number 0x123450 would be encoded as 'BCDEFA'
	char EncodedNumberBuffer[sizeof(uint64_t) * 2];
	MutableArrayRef<char> BufferRef(EncodedNumberBuffer);
	MutableArrayRef<char>::reverse_iterator I = BufferRef.rbegin();
	for (; Value != 0; Value >>= 4)
	*I++ = 'A' + (Value & 0xf);
	Out.write(I.base(), I - BufferRef.rbegin());
	Out << '@';
	}
	}

	static const TemplateDecl *
	isTemplate(const NamedDecl ND, const TemplateArgumentList &TemplateArgs) {
	// Check if we have a function template.
	if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(ND)) {
	if (const TemplateDecl *TD = FD->getPrimaryTemplate()) {
	TemplateArgs = FD->getTemplateSpecializationArgs();
	return TD;
	}
	}

	// Check if we have a class template.
	if (const ClassTemplateSpecializationDecl *Spec =
	dyn_cast<ClassTemplateSpecializationDecl>(ND)) {
	TemplateArgs = &Spec->getTemplateArgs();
	return Spec->getSpecializedTemplate();
	}

	// Check if we have a variable template.
	if (const VarTemplateSpecializationDecl *Spec =
	dyn_cast<VarTemplateSpecializationDecl>(ND)) {
	TemplateArgs = &Spec->getTemplateArgs();
	return Spec->getSpecializedTemplate();
	}

	return nullptr;
	}

	void MicrosoftCXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND,
	DeclarationName Name) {
	// <unqualified-name> ::= <operator-name>
	// ::= <ctor-dtor-name>
	// ::= <source-name>
	// ::= <template-name>

	// Check if we have a template.
	const TemplateArgumentList *TemplateArgs = nullptr;
	if (const TemplateDecl *TD = isTemplate(ND, TemplateArgs)) {
	// Function templates aren't considered for name back referencing. This
	// makes sense since function templates aren't likely to occur multiple
	// times in a symbol.
	if (isa<FunctionTemplateDecl>(TD)) {
	mangleTemplateInstantiationName(TD, *TemplateArgs);
	Out << '@';
	return;
	}

	// Here comes the tricky thing: if we need to mangle something like
	// void foo(A::X<Y>, B::X<Y>),
	// the X<Y> part is aliased. However, if you need to mangle
	// void foo(A::X<A::Y>, A::X<B::Y>),
	// the A::X<> part is not aliased.
	// That said, from the mangler's perspective we have a structure like this:
	// namespace[s] -> type[ -> template-parameters]
	// but from the Clang perspective we have
	// type [ -> template-parameters]
	// \-> namespace[s]
	// What we do is we create a new mangler, mangle the same type (without
	// a namespace suffix) to a string using the extra mangler and then use
	// the mangled type name as a key to check the mangling of different types
	// for aliasing.

	llvm::SmallString<64> TemplateMangling;
	llvm::raw_svector_ostream Stream(TemplateMangling);
	MicrosoftCXXNameMangler Extra(Context, Stream);
	Extra.mangleTemplateInstantiationName(TD, *TemplateArgs);

	mangleSourceName(TemplateMangling);
	return;
	}

	switch (Name.getNameKind()) {
	case DeclarationName::Identifier: {
	if (const IdentifierInfo *II = Name.getAsIdentifierInfo()) {
	mangleSourceName(II->getName());
	break;
	}

	// Otherwise, an anonymous entity. We must have a declaration.
	assert(ND && "mangling empty name without declaration");

	if (const NamespaceDecl *NS = dyn_cast<NamespaceDecl>(ND)) {
	if (NS->isAnonymousNamespace()) {
	Out << "?A0x" << Context.getAnonymousNamespaceHash() << '@';
	break;
	}
	}

	if (const DecompositionDecl *DD = dyn_cast<DecompositionDecl>(ND)) {
	// FIXME: Invented mangling for decomposition declarations:
	// [X,Y,Z]
	// where X,Y,Z are the names of the bindings.
	llvm::SmallString<128> Name("[");
	for (auto *BD : DD->bindings()) {
	if (Name.size() > 1)
	Name += ',';
	Name += BD->getDeclName().getAsIdentifierInfo()->getName();
	}
	Name += ']';
	mangleSourceName(Name);
	break;
	}

	if (const VarDecl *VD = dyn_cast<VarDecl>(ND)) {
	// We must have an anonymous union or struct declaration.
	const CXXRecordDecl *RD = VD->getType()->getAsCXXRecordDecl();
	assert(RD && "expected variable decl to have a record type");
	// Anonymous types with no tag or typedef get the name of their
	// declarator mangled in. If they have no declarator, number them with
	// a $S prefix.
	llvm::SmallString<64> Name("$S");
	// Get a unique id for the anonymous struct.
	Name += llvm::utostr(Context.getAnonymousStructId(RD) + 1);
	mangleSourceName(Name.str());
	break;
	}

	// We must have an anonymous struct.
	const TagDecl *TD = cast<TagDecl>(ND);
	if (const TypedefNameDecl *D = TD->getTypedefNameForAnonDecl()) {
	assert(TD->getDeclContext() == D->getDeclContext() &&
	"Typedef should not be in another decl context!");
	assert(D->getDeclName().getAsIdentifierInfo() &&
	"Typedef was not named!");
	mangleSourceName(D->getDeclName().getAsIdentifierInfo()->getName());
	break;
	}

	if (const CXXRecordDecl *Record = dyn_cast<CXXRecordDecl>(TD)) {
	if (Record->isLambda()) {
	llvm::SmallString<10> Name("<lambda_");

	Decl *LambdaContextDecl = Record->getLambdaContextDecl();
	unsigned LambdaManglingNumber = Record->getLambdaManglingNumber();
	unsigned LambdaId;
	const ParmVarDecl *Parm =
	dyn_cast_or_null<ParmVarDecl>(LambdaContextDecl);
	const FunctionDecl *Func =
	Parm ? dyn_cast<FunctionDecl>(Parm->getDeclContext()) : nullptr;

	if (Func) {
	unsigned DefaultArgNo =
	Func->getNumParams() - Parm->getFunctionScopeIndex();
	Name += llvm::utostr(DefaultArgNo);
	Name += "_";
	}

	if (LambdaManglingNumber)
	LambdaId = LambdaManglingNumber;
	else
	LambdaId = Context.getLambdaId(Record);

	Name += llvm::utostr(LambdaId);
	Name += ">";

	mangleSourceName(Name);

	// If the context of a closure type is an initializer for a class
	// member (static or nonstatic), it is encoded in a qualified name.
	if (LambdaManglingNumber && LambdaContextDecl) {
	if ((isa<VarDecl>(LambdaContextDecl) \|\|
	isa<FieldDecl>(LambdaContextDecl)) &&
	LambdaContextDecl->getDeclContext()->isRecord()) {
	mangleUnqualifiedName(cast<NamedDecl>(LambdaContextDecl));
	}
	}
	break;
	}
	}

	llvm::SmallString<64> Name;
	if (DeclaratorDecl *DD =
	Context.getASTContext().getDeclaratorForUnnamedTagDecl(TD)) {
	// Anonymous types without a name for linkage purposes have their
	// declarator mangled in if they have one.
	Name += "<unnamed-type-";
	Name += DD->getName();
	} else if (TypedefNameDecl *TND =
	Context.getASTContext().getTypedefNameForUnnamedTagDecl(
	TD)) {
	// Anonymous types without a name for linkage purposes have their
	// associate typedef mangled in if they have one.
	Name += "<unnamed-type-";
	Name += TND->getName();
	} else if (isa<EnumDecl>(TD) &&
	cast<EnumDecl>(TD)->enumerator_begin() !=
	cast<EnumDecl>(TD)->enumerator_end()) {
	// Anonymous non-empty enums mangle in the first enumerator.
	auto *ED = cast<EnumDecl>(TD);
	Name += "<unnamed-enum-";
	Name += ED->enumerator_begin()->getName();
	} else {
	// Otherwise, number the types using a $S prefix.
	Name += "<unnamed-type-$S";
	Name += llvm::utostr(Context.getAnonymousStructId(TD) + 1);
	}
	Name += ">";
	mangleSourceName(Name.str());
	break;
	}

	case DeclarationName::ObjCZeroArgSelector:
	case DeclarationName::ObjCOneArgSelector:
	case DeclarationName::ObjCMultiArgSelector: {
	// This is reachable only when constructing an outlined SEH finally
	// block. Nothing depends on this mangling and it's used only with
	// functinos with internal linkage.
	llvm::SmallString<64> Name;
	mangleSourceName(Name.str());
	break;
	}

	case DeclarationName::CXXConstructorName:
	if (isStructorDecl(ND)) {
	if (StructorType == Ctor_CopyingClosure) {
	Out << "?_O";
	return;
	}
	if (StructorType == Ctor_DefaultClosure) {
	Out << "?_F";
	return;
	}
	}
	Out << "?0";
	return;

	case DeclarationName::CXXDestructorName:
	if (isStructorDecl(ND))
	// If the named decl is the C++ destructor we're mangling,
	// use the type we were given.
	mangleCXXDtorType(static_cast<CXXDtorType>(StructorType));
	else
	// Otherwise, use the base destructor name. This is relevant if a
	// class with a destructor is declared within a destructor.
	mangleCXXDtorType(Dtor_Base);
	break;

	case DeclarationName::CXXConversionFunctionName:
	// <operator-name> ::= ?B # (cast)
	// The target type is encoded as the return type.
	Out << "?B";
	break;

	case DeclarationName::CXXOperatorName:
	mangleOperatorName(Name.getCXXOverloadedOperator(), ND->getLocation());
	break;

	case DeclarationName::CXXLiteralOperatorName: {
	Out << "?__K";
	mangleSourceName(Name.getCXXLiteralIdentifier()->getName());
	break;
	}

	case DeclarationName::CXXDeductionGuideName:
	llvm_unreachable("Can't mangle a deduction guide name!");

	case DeclarationName::CXXUsingDirective:
	llvm_unreachable("Can't mangle a using directive name!");
	}
	}

	// <postfix> ::= <unqualified-name> [<postfix>]
	// ::= <substitution> [<postfix>]
	void MicrosoftCXXNameMangler::mangleNestedName(const NamedDecl *ND) {
	const DeclContext *DC = getEffectiveDeclContext(ND);
	while (!DC->isTranslationUnit()) {
	if (isa<TagDecl>(ND) \|\| isa<VarDecl>(ND)) {
	unsigned Disc;
	if (Context.getNextDiscriminator(ND, Disc)) {
	Out << '?';
	mangleNumber(Disc);
	Out << '?';
	}
	}

	if (const BlockDecl *BD = dyn_cast<BlockDecl>(DC)) {
	auto Discriminate =
	[](StringRef Name, const unsigned Discriminator,
	const unsigned ParameterDiscriminator) -> std::string {
	std::string Buffer;
	llvm::raw_string_ostream Stream(Buffer);
	Stream << Name;
	if (Discriminator)
	Stream << '_' << Discriminator;
	if (ParameterDiscriminator)
	Stream << '_' << ParameterDiscriminator;
	return Stream.str();
	};

	unsigned Discriminator = BD->getBlockManglingNumber();
	if (!Discriminator)
	Discriminator = Context.getBlockId(BD, /Local=/false);

	// Mangle the parameter position as a discriminator to deal with unnamed
	// parameters. Rather than mangling the unqualified parameter name,
	// always use the position to give a uniform mangling.
	unsigned ParameterDiscriminator = 0;
	if (const auto *MC = BD->getBlockManglingContextDecl())
	if (const auto *P = dyn_cast<ParmVarDecl>(MC))
	if (const auto *F = dyn_cast<FunctionDecl>(P->getDeclContext()))
	ParameterDiscriminator =
	F->getNumParams() - P->getFunctionScopeIndex();

	DC = getEffectiveDeclContext(BD);

	Out << '?';
	mangleSourceName(Discriminate("_block_invoke", Discriminator,
	ParameterDiscriminator));
	// If we have a block mangling context, encode that now. This allows us
	// to discriminate between named static data initializers in the same
	// scope. This is handled differently from parameters, which use
	// positions to discriminate between multiple instances.
	if (const auto *MC = BD->getBlockManglingContextDecl())
	if (!isa<ParmVarDecl>(MC))
	if (const auto *ND = dyn_cast<NamedDecl>(MC))
	mangleUnqualifiedName(ND);
	// MS ABI and Itanium manglings are in inverted scopes. In the case of a
	// RecordDecl, mangle the entire scope hierarchy at this point rather than
	// just the unqualified name to get the ordering correct.
	if (const auto *RD = dyn_cast<RecordDecl>(DC))
	mangleName(RD);
	else
	Out << '@';
	// void __cdecl
	Out << "YAX";
	// struct __block_literal *
	Out << 'P';
	// __ptr64
	if (PointersAre64Bit)
	Out << 'E';
	Out << 'A';
	mangleArtificialTagType(TTK_Struct,
	Discriminate("__block_literal", Discriminator,
	ParameterDiscriminator));
	Out << "@Z";

	// If the effective context was a Record, we have fully mangled the
	// qualified name and do not need to continue.
	if (isa<RecordDecl>(DC))
	break;
	continue;
	} else if (const ObjCMethodDecl *Method = dyn_cast<ObjCMethodDecl>(DC)) {
	mangleObjCMethodName(Method);
	} else if (isa<NamedDecl>(DC)) {
	ND = cast<NamedDecl>(DC);
	if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(ND)) {
	mangle(FD, "?");
	break;
	} else {
	mangleUnqualifiedName(ND);
	// Lambdas in default arguments conceptually belong to the function the
	// parameter corresponds to.
	if (const auto *LDADC = getLambdaDefaultArgumentDeclContext(ND)) {
	DC = LDADC;
	continue;
	}
	}
	}
	DC = DC->getParent();
	}
	}

	void MicrosoftCXXNameMangler::mangleCXXDtorType(CXXDtorType T) {
	// Microsoft uses the names on the case labels for these dtor variants. Clang
	// uses the Itanium terminology internally. Everything in this ABI delegates
	// towards the base dtor.
	switch (T) {
	// <operator-name> ::= ?1 # destructor
	case Dtor_Base: Out << "?1"; return;
	// <operator-name> ::= ?_D # vbase destructor
	case Dtor_Complete: Out << "?_D"; return;
	// <operator-name> ::= ?_G # scalar deleting destructor
	case Dtor_Deleting: Out << "?_G"; return;
	// <operator-name> ::= ?_E # vector deleting destructor
	// FIXME: Add a vector deleting dtor type. It goes in the vtable, so we need
	// it.
	case Dtor_Comdat:
	llvm_unreachable("not expecting a COMDAT");
	}
	llvm_unreachable("Unsupported dtor type?");
	}

	void MicrosoftCXXNameMangler::mangleOperatorName(OverloadedOperatorKind OO,
	SourceLocation Loc) {
	switch (OO) {
	// ?0 # constructor
	// ?1 # destructor
	// <operator-name> ::= ?2 # new
	case OO_New: Out << "?2"; break;
	// <operator-name> ::= ?3 # delete
	case OO_Delete: Out << "?3"; break;
	// <operator-name> ::= ?4 # =
	case OO_Equal: Out << "?4"; break;
	// <operator-name> ::= ?5 # >>
	case OO_GreaterGreater: Out << "?5"; break;
	// <operator-name> ::= ?6 # <<
	case OO_LessLess: Out << "?6"; break;
	// <operator-name> ::= ?7 # !
	case OO_Exclaim: Out << "?7"; break;
	// <operator-name> ::= ?8 # ==
	case OO_EqualEqual: Out << "?8"; break;
	// <operator-name> ::= ?9 # !=
	case OO_ExclaimEqual: Out << "?9"; break;
	// <operator-name> ::= ?A # []
	case OO_Subscript: Out << "?A"; break;
	// ?B # conversion
	// <operator-name> ::= ?C # ->
	case OO_Arrow: Out << "?C"; break;
	// <operator-name> ::= ?D # *
	case OO_Star: Out << "?D"; break;
	// <operator-name> ::= ?E # ++
	case OO_PlusPlus: Out << "?E"; break;
	// <operator-name> ::= ?F # --
	case OO_MinusMinus: Out << "?F"; break;
	// <operator-name> ::= ?G # -
	case OO_Minus: Out << "?G"; break;
	// <operator-name> ::= ?H # +
	case OO_Plus: Out << "?H"; break;
	// <operator-name> ::= ?I # &
	case OO_Amp: Out << "?I"; break;
	// <operator-name> ::= ?J # ->*
	case OO_ArrowStar: Out << "?J"; break;
	// <operator-name> ::= ?K # /
	case OO_Slash: Out << "?K"; break;
	// <operator-name> ::= ?L # %
	case OO_Percent: Out << "?L"; break;
	// <operator-name> ::= ?M # <
	case OO_Less: Out << "?M"; break;
	// <operator-name> ::= ?N # <=
	case OO_LessEqual: Out << "?N"; break;
	// <operator-name> ::= ?O # >
	case OO_Greater: Out << "?O"; break;
	// <operator-name> ::= ?P # >=
	case OO_GreaterEqual: Out << "?P"; break;
	// <operator-name> ::= ?Q # ,
	case OO_Comma: Out << "?Q"; break;
	// <operator-name> ::= ?R # ()
	case OO_Call: Out << "?R"; break;
	// <operator-name> ::= ?S # ~
	case OO_Tilde: Out << "?S"; break;
	// <operator-name> ::= ?T # ^
	case OO_Caret: Out << "?T"; break;
	// <operator-name> ::= ?U # \|
	case OO_Pipe: Out << "?U"; break;
	// <operator-name> ::= ?V # &&
	case OO_AmpAmp: Out << "?V"; break;
	// <operator-name> ::= ?W # \|\|
	case OO_PipePipe: Out << "?W"; break;
	// <operator-name> ::= ?X # *=
	case OO_StarEqual: Out << "?X"; break;
	// <operator-name> ::= ?Y # +=
	case OO_PlusEqual: Out << "?Y"; break;
	// <operator-name> ::= ?Z # -=
	case OO_MinusEqual: Out << "?Z"; break;
	// <operator-name> ::= ?_0 # /=
	case OO_SlashEqual: Out << "?_0"; break;
	// <operator-name> ::= ?_1 # %=
	case OO_PercentEqual: Out << "?_1"; break;
	// <operator-name> ::= ?_2 # >>=
	case OO_GreaterGreaterEqual: Out << "?_2"; break;
	// <operator-name> ::= ?_3 # <<=
	case OO_LessLessEqual: Out << "?_3"; break;
	// <operator-name> ::= ?_4 # &=
	case OO_AmpEqual: Out << "?_4"; break;
	// <operator-name> ::= ?_5 # \|=
	case OO_PipeEqual: Out << "?_5"; break;
	// <operator-name> ::= ?_6 # ^=
	case OO_CaretEqual: Out << "?_6"; break;
	// ?_7 # vftable
	// ?_8 # vbtable
	// ?_9 # vcall
	// ?_A # typeof
	// ?_B # local static guard
	// ?_C # string
	// ?_D # vbase destructor
	// ?_E # vector deleting destructor
	// ?_F # default constructor closure
	// ?_G # scalar deleting destructor
	// ?_H # vector constructor iterator
	// ?_I # vector destructor iterator
	// ?_J # vector vbase constructor iterator
	// ?_K # virtual displacement map
	// ?_L # eh vector constructor iterator
	// ?_M # eh vector destructor iterator
	// ?_N # eh vector vbase constructor iterator
	// ?_O # copy constructor closure
	// ?_P<name> # udt returning <name>
	// ?_Q # <unknown>
	// ?_R0 # RTTI Type Descriptor
	// ?_R1 # RTTI Base Class Descriptor at (a,b,c,d)
	// ?_R2 # RTTI Base Class Array
	// ?_R3 # RTTI Class Hierarchy Descriptor
	// ?_R4 # RTTI Complete Object Locator
	// ?_S # local vftable
	// ?_T # local vftable constructor closure
	// <operator-name> ::= ?_U # new[]
	case OO_Array_New: Out << "?_U"; break;
	// <operator-name> ::= ?_V # delete[]
	case OO_Array_Delete: Out << "?_V"; break;
	// <operator-name> ::= ?__L # co_await
	case OO_Coawait: Out << "?__L"; break;

	case OO_Spaceship: {
	// FIXME: Once MS picks a mangling, use it.
	DiagnosticsEngine &Diags = Context.getDiags();
	unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
	"cannot mangle this three-way comparison operator yet");
	Diags.Report(Loc, DiagID);
	break;
	}

	case OO_Conditional: {
	DiagnosticsEngine &Diags = Context.getDiags();
	unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
	"cannot mangle this conditional operator yet");
	Diags.Report(Loc, DiagID);
	break;
	}

	case OO_None:
	case NUM_OVERLOADED_OPERATORS:
	llvm_unreachable("Not an overloaded operator");
	}
	}

	void MicrosoftCXXNameMangler::mangleSourceName(StringRef Name) {
	// <source name> ::= <identifier> @
	BackRefVec::iterator Found =
	std::find(NameBackReferences.begin(), NameBackReferences.end(), Name);
	if (Found == NameBackReferences.end()) {
	if (NameBackReferences.size() < 10)
	NameBackReferences.push_back(Name);
	Out << Name << '@';
	} else {
	Out << (Found - NameBackReferences.begin());
	}
	}

	void MicrosoftCXXNameMangler::mangleObjCMethodName(const ObjCMethodDecl *MD) {
	Context.mangleObjCMethodName(MD, Out);
	}

	void MicrosoftCXXNameMangler::mangleTemplateInstantiationName(
	const TemplateDecl *TD, const TemplateArgumentList &TemplateArgs) {
	// <template-name> ::= <unscoped-template-name> <template-args>
	// ::= <substitution>
	// Always start with the unqualified name.

	// Templates have their own context for back references.
	ArgBackRefMap OuterArgsContext;
	BackRefVec OuterTemplateContext;
	PassObjectSizeArgsSet OuterPassObjectSizeArgs;
	NameBackReferences.swap(OuterTemplateContext);
	TypeBackReferences.swap(OuterArgsContext);
	PassObjectSizeArgs.swap(OuterPassObjectSizeArgs);

	mangleUnscopedTemplateName(TD);
	mangleTemplateArgs(TD, TemplateArgs);

	// Restore the previous back reference contexts.
	NameBackReferences.swap(OuterTemplateContext);
	TypeBackReferences.swap(OuterArgsContext);
	PassObjectSizeArgs.swap(OuterPassObjectSizeArgs);
	}

	void
	MicrosoftCXXNameMangler::mangleUnscopedTemplateName(const TemplateDecl *TD) {
	// <unscoped-template-name> ::= ?$ <unqualified-name>
	Out << "?$";
	mangleUnqualifiedName(TD);
	}

	void MicrosoftCXXNameMangler::mangleIntegerLiteral(const llvm::APSInt &Value,
	bool IsBoolean) {
	// <integer-literal> ::= $0 <number>
	Out << "$0";
	// Make sure booleans are encoded as 0/1.
	if (IsBoolean && Value.getBoolValue())
	mangleNumber(1);
	else if (Value.isSigned())
	mangleNumber(Value.getSExtValue());
	else
	mangleNumber(Value.getZExtValue());
	}

	void MicrosoftCXXNameMangler::mangleExpression(const Expr *E) {
	// See if this is a constant expression.
	llvm::APSInt Value;
	if (E->isIntegerConstantExpr(Value, Context.getASTContext())) {
	mangleIntegerLiteral(Value, E->getType()->isBooleanType());
	return;
	}

	// Look through no-op casts like template parameter substitutions.
	E = E->IgnoreParenNoopCasts(Context.getASTContext());

	const CXXUuidofExpr *UE = nullptr;
	if (const UnaryOperator *UO = dyn_cast<UnaryOperator>(E)) {
	if (UO->getOpcode() == UO_AddrOf)
	UE = dyn_cast<CXXUuidofExpr>(UO->getSubExpr());
	} else
	UE = dyn_cast<CXXUuidofExpr>(E);

	if (UE) {
	// If we had to peek through an address-of operator, treat this like we are
	// dealing with a pointer type. Otherwise, treat it like a const reference.
	//
	// N.B. This matches up with the handling of TemplateArgument::Declaration
	// in mangleTemplateArg
	if (UE == E)
	Out << "$E?";
	else
	Out << "$1?";

	// This CXXUuidofExpr is mangled as-if it were actually a VarDecl from
	// const __s_GUID _GUID_{lower case UUID with underscores}
	StringRef Uuid = UE->getUuidStr();
	std::string Name = "_GUID_" + Uuid.lower();
	std::replace(Name.begin(), Name.end(), '-', '_');

	mangleSourceName(Name);
	// Terminate the whole name with an '@'.
	Out << '@';
	// It's a global variable.
	Out << '3';
	// It's a struct called __s_GUID.
	mangleArtificialTagType(TTK_Struct, "__s_GUID");
	// It's const.
	Out << 'B';
	return;
	}

	// As bad as this diagnostic is, it's better than crashing.
	DiagnosticsEngine &Diags = Context.getDiags();
	unsigned DiagID = Diags.getCustomDiagID(
	DiagnosticsEngine::Error, "cannot yet mangle expression type %0");
	Diags.Report(E->getExprLoc(), DiagID) << E->getStmtClassName()
	<< E->getSourceRange();
	}

	void MicrosoftCXXNameMangler::mangleTemplateArgs(
	const TemplateDecl *TD, const TemplateArgumentList &TemplateArgs) {
	// <template-args> ::= <template-arg>+
	const TemplateParameterList *TPL = TD->getTemplateParameters();
	assert(TPL->size() == TemplateArgs.size() &&
	"size mismatch between args and parms!");

	for (size_t i = 0; i < TemplateArgs.size(); ++i) {
	const TemplateArgument &TA = TemplateArgs[i];

	// Separate consecutive packs by $$Z.
	if (i > 0 && TA.getKind() == TemplateArgument::Pack &&
	TemplateArgs[i - 1].getKind() == TemplateArgument::Pack)
	Out << "$$Z";

	mangleTemplateArg(TD, TA, TPL->getParam(i));
	}
	}

	void MicrosoftCXXNameMangler::mangleTemplateArg(const TemplateDecl *TD,
	const TemplateArgument &TA,
	const NamedDecl *Parm) {
	// <template-arg> ::= <type>
	// ::= <integer-literal>
	// ::= <member-data-pointer>
	// ::= <member-function-pointer>
	// ::= $E? <name> <type-encoding>
	// ::= $1? <name> <type-encoding>
	// ::= $0A@
	// ::= <template-args>

	switch (TA.getKind()) {
	case TemplateArgument::Null:
	llvm_unreachable("Can't mangle null template arguments!");
	case TemplateArgument::TemplateExpansion:
	llvm_unreachable("Can't mangle template expansion arguments!");
	case TemplateArgument::Type: {
	QualType T = TA.getAsType();
	mangleType(T, SourceRange(), QMM_Escape);
	break;
	}
	case TemplateArgument::Declaration: {
	const NamedDecl *ND = TA.getAsDecl();
	if (isa<FieldDecl>(ND) \|\| isa<IndirectFieldDecl>(ND)) {
	mangleMemberDataPointer(cast<CXXRecordDecl>(ND->getDeclContext())
	->getMostRecentNonInjectedDecl(),
	cast<ValueDecl>(ND));
	} else if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(ND)) {
	const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD);
	if (MD && MD->isInstance()) {
	mangleMemberFunctionPointer(
	MD->getParent()->getMostRecentNonInjectedDecl(), MD);
	} else {
	Out << "$1?";
	mangleName(FD);
	mangleFunctionEncoding(FD, /ShouldMangle=/true);
	}
	} else {
	mangle(ND, TA.getParamTypeForDecl()->isReferenceType() ? "$E?" : "$1?");
	}
	break;
	}
	case TemplateArgument::Integral:
	mangleIntegerLiteral(TA.getAsIntegral(),
	TA.getIntegralType()->isBooleanType());
	break;
	case TemplateArgument::NullPtr: {
	QualType T = TA.getNullPtrType();
	if (const MemberPointerType *MPT = T->getAs<MemberPointerType>()) {
	const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
	if (MPT->isMemberFunctionPointerType() &&
	!isa<FunctionTemplateDecl>(TD)) {
	mangleMemberFunctionPointer(RD, nullptr);
	return;
	}
	if (MPT->isMemberDataPointer()) {
	if (!isa<FunctionTemplateDecl>(TD)) {
	mangleMemberDataPointer(RD, nullptr);
	return;
	}
	// nullptr data pointers are always represented with a single field
	// which is initialized with either 0 or -1. Why -1? Well, we need to
	// distinguish the case where the data member is at offset zero in the
	// record.
	// However, we are free to use 0 if we would use multiple fields for
	// non-nullptr member pointers.
	if (!RD->nullFieldOffsetIsZero()) {
	mangleIntegerLiteral(llvm::APSInt::get(-1), /IsBoolean=/false);
	return;
	}
	}
	}
	mangleIntegerLiteral(llvm::APSInt::getUnsigned(0), /IsBoolean=/false);
	break;
	}
	case TemplateArgument::Expression:
	mangleExpression(TA.getAsExpr());
	break;
	case TemplateArgument::Pack: {
	ArrayRef<TemplateArgument> TemplateArgs = TA.getPackAsArray();
	if (TemplateArgs.empty()) {
	if (isa<TemplateTypeParmDecl>(Parm) \|\|
	isa<TemplateTemplateParmDecl>(Parm))
	// MSVC 2015 changed the mangling for empty expanded template packs,
	// use the old mangling for link compatibility for old versions.
	Out << (Context.getASTContext().getLangOpts().isCompatibleWithMSVC(
	LangOptions::MSVC2015)
	? "$$V"
	: "$$$V");
	else if (isa<NonTypeTemplateParmDecl>(Parm))
	Out << "$S";
	else
	llvm_unreachable("unexpected template parameter decl!");
	} else {
	for (const TemplateArgument &PA : TemplateArgs)
	mangleTemplateArg(TD, PA, Parm);
	}
	break;
	}
	case TemplateArgument::Template: {
	const NamedDecl *ND =
	TA.getAsTemplate().getAsTemplateDecl()->getTemplatedDecl();
	if (const auto *TD = dyn_cast<TagDecl>(ND)) {
	mangleType(TD);
	} else if (isa<TypeAliasDecl>(ND)) {
	Out << "$$Y";
	mangleName(ND);
	} else {
	llvm_unreachable("unexpected template template NamedDecl!");
	}
	break;
	}
	}
	}

	void MicrosoftCXXNameMangler::mangleObjCProtocol(const ObjCProtocolDecl *PD) {
	llvm::SmallString<64> TemplateMangling;
	llvm::raw_svector_ostream Stream(TemplateMangling);
	MicrosoftCXXNameMangler Extra(Context, Stream);

	Stream << "?$";
	Extra.mangleSourceName("Protocol");
	Extra.mangleArtificialTagType(TTK_Struct, PD->getName());

	mangleArtificialTagType(TTK_Struct, TemplateMangling, {"__ObjC"});
	}

	void MicrosoftCXXNameMangler::mangleObjCLifetime(const QualType Type,
	Qualifiers Quals,
	SourceRange Range) {
	llvm::SmallString<64> TemplateMangling;
	llvm::raw_svector_ostream Stream(TemplateMangling);
	MicrosoftCXXNameMangler Extra(Context, Stream);

	Stream << "?$";
	switch (Quals.getObjCLifetime()) {
	case Qualifiers::OCL_None:
	case Qualifiers::OCL_ExplicitNone:
	break;
	case Qualifiers::OCL_Autoreleasing:
	Extra.mangleSourceName("Autoreleasing");
	break;
	case Qualifiers::OCL_Strong:
	Extra.mangleSourceName("Strong");
	break;
	case Qualifiers::OCL_Weak:
	Extra.mangleSourceName("Weak");
	break;
	}
	Extra.manglePointerCVQualifiers(Quals);
	Extra.manglePointerExtQualifiers(Quals, Type);
	Extra.mangleType(Type, Range);

	mangleArtificialTagType(TTK_Struct, TemplateMangling, {"__ObjC"});
	}

	void MicrosoftCXXNameMangler::mangleObjCKindOfType(const ObjCObjectType *T,
	Qualifiers Quals,
	SourceRange Range) {
	llvm::SmallString<64> TemplateMangling;
	llvm::raw_svector_ostream Stream(TemplateMangling);
	MicrosoftCXXNameMangler Extra(Context, Stream);

	Stream << "?$";
	Extra.mangleSourceName("KindOf");
	Extra.mangleType(QualType(T, 0)
	.stripObjCKindOfType(getASTContext())
	->getAs<ObjCObjectType>(),
	Quals, Range);

	mangleArtificialTagType(TTK_Struct, TemplateMangling, {"__ObjC"});
	}

	void MicrosoftCXXNameMangler::mangleQualifiers(Qualifiers Quals,
	bool IsMember) {
	// <cvr-qualifiers> ::= [E] [F] [I] <base-cvr-qualifiers>
	// 'E' means __ptr64 (32-bit only); 'F' means __unaligned (32/64-bit only);
	// 'I' means __restrict (32/64-bit).
	// Note that the MSVC __restrict keyword isn't the same as the C99 restrict
	// keyword!
	// <base-cvr-qualifiers> ::= A # near
	// ::= B # near const
	// ::= C # near volatile
	// ::= D # near const volatile
	// ::= E # far (16-bit)
	// ::= F # far const (16-bit)
	// ::= G # far volatile (16-bit)
	// ::= H # far const volatile (16-bit)
	// ::= I # huge (16-bit)
	// ::= J # huge const (16-bit)
	// ::= K # huge volatile (16-bit)
	// ::= L # huge const volatile (16-bit)
	// ::= M <basis> # based
	// ::= N <basis> # based const
	// ::= O <basis> # based volatile
	// ::= P <basis> # based const volatile
	// ::= Q # near member
	// ::= R # near const member
	// ::= S # near volatile member
	// ::= T # near const volatile member
	// ::= U # far member (16-bit)
	// ::= V # far const member (16-bit)
	// ::= W # far volatile member (16-bit)
	// ::= X # far const volatile member (16-bit)
	// ::= Y # huge member (16-bit)
	// ::= Z # huge const member (16-bit)
	// ::= 0 # huge volatile member (16-bit)
	// ::= 1 # huge const volatile member (16-bit)
	// ::= 2 <basis> # based member
	// ::= 3 <basis> # based const member
	// ::= 4 <basis> # based volatile member
	// ::= 5 <basis> # based const volatile member
	// ::= 6 # near function (pointers only)
	// ::= 7 # far function (pointers only)
	// ::= 8 # near method (pointers only)
	// ::= 9 # far method (pointers only)
	// ::= _A <basis> # based function (pointers only)
	// ::= _B <basis> # based function (far?) (pointers only)
	// ::= _C <basis> # based method (pointers only)
	// ::= _D <basis> # based method (far?) (pointers only)
	// ::= _E # block (Clang)
	// <basis> ::= 0 # __based(void)
	// ::= 1 # __based(segment)?
	// ::= 2 <name> # __based(name)
	// ::= 3 # ?
	// ::= 4 # ?
	// ::= 5 # not really based
	bool HasConst = Quals.hasConst(),
	HasVolatile = Quals.hasVolatile();

	if (!IsMember) {
	if (HasConst && HasVolatile) {
	Out << 'D';
	} else if (HasVolatile) {
	Out << 'C';
	} else if (HasConst) {
	Out << 'B';
	} else {
	Out << 'A';
	}
	} else {
	if (HasConst && HasVolatile) {
	Out << 'T';
	} else if (HasVolatile) {
	Out << 'S';
	} else if (HasConst) {
	Out << 'R';
	} else {
	Out << 'Q';
	}
	}

	// FIXME: For now, just drop all extension qualifiers on the floor.
	}

	void
	MicrosoftCXXNameMangler::mangleRefQualifier(RefQualifierKind RefQualifier) {
	// <ref-qualifier> ::= G # lvalue reference
	// ::= H # rvalue-reference
	switch (RefQualifier) {
	case RQ_None:
	break;

	case RQ_LValue:
	Out << 'G';
	break;

	case RQ_RValue:
	Out << 'H';
	break;
	}
	}

	void MicrosoftCXXNameMangler::manglePointerExtQualifiers(Qualifiers Quals,
	QualType PointeeType) {
	if (PointersAre64Bit &&
	(PointeeType.isNull() \|\| !PointeeType->isFunctionType()))
	Out << 'E';

	if (Quals.hasRestrict())
	Out << 'I';

	if (Quals.hasUnaligned() \|\|
	(!PointeeType.isNull() && PointeeType.getLocalQualifiers().hasUnaligned()))
	Out << 'F';
	}

	void MicrosoftCXXNameMangler::manglePointerCVQualifiers(Qualifiers Quals) {
	// <pointer-cv-qualifiers> ::= P # no qualifiers
	// ::= Q # const
	// ::= R # volatile
	// ::= S # const volatile
	bool HasConst = Quals.hasConst(),
	HasVolatile = Quals.hasVolatile();

	if (HasConst && HasVolatile) {
	Out << 'S';
	} else if (HasVolatile) {
	Out << 'R';
	} else if (HasConst) {
	Out << 'Q';
	} else {
	Out << 'P';
	}
	}

	void MicrosoftCXXNameMangler::mangleArgumentType(QualType T,
	SourceRange Range) {
	// MSVC will backreference two canonically equivalent types that have slightly
	// different manglings when mangled alone.

	// Decayed types do not match up with non-decayed versions of the same type.
	//
	// e.g.
	// void (*x)(void) will not form a backreference with void x(void)
	void *TypePtr;
	if (const auto *DT = T->getAs<DecayedType>()) {
	QualType OriginalType = DT->getOriginalType();
	// All decayed ArrayTypes should be treated identically; as-if they were
	// a decayed IncompleteArrayType.
	if (const auto *AT = getASTContext().getAsArrayType(OriginalType))
	OriginalType = getASTContext().getIncompleteArrayType(
	AT->getElementType(), AT->getSizeModifier(),
	AT->getIndexTypeCVRQualifiers());

	TypePtr = OriginalType.getCanonicalType().getAsOpaquePtr();
	// If the original parameter was textually written as an array,
	// instead treat the decayed parameter like it's const.
	//
	// e.g.
	// int [] -> int * const
	if (OriginalType->isArrayType())
	T = T.withConst();
	} else {
	TypePtr = T.getCanonicalType().getAsOpaquePtr();
	}

	ArgBackRefMap::iterator Found = TypeBackReferences.find(TypePtr);

	if (Found == TypeBackReferences.end()) {
	size_t OutSizeBefore = Out.tell();

	mangleType(T, Range, QMM_Drop);

	// See if it's worth creating a back reference.
	// Only types longer than 1 character are considered
	// and only 10 back references slots are available:
	bool LongerThanOneChar = (Out.tell() - OutSizeBefore > 1);
	if (LongerThanOneChar && TypeBackReferences.size() < 10) {
	size_t Size = TypeBackReferences.size();
	TypeBackReferences[TypePtr] = Size;
	}
	} else {
	Out << Found->second;
	}
	}

	void MicrosoftCXXNameMangler::manglePassObjectSizeArg(
	const PassObjectSizeAttr *POSA) {
	int Type = POSA->getType();

	auto Iter = PassObjectSizeArgs.insert(Type).first;
	auto TypePtr = (const void )&*Iter;
	ArgBackRefMap::iterator Found = TypeBackReferences.find(TypePtr);

	if (Found == TypeBackReferences.end()) {
	mangleArtificialTagType(TTK_Enum, "__pass_object_size" + llvm::utostr(Type),
	{"__clang"});

	if (TypeBackReferences.size() < 10) {
	size_t Size = TypeBackReferences.size();
	TypeBackReferences[TypePtr] = Size;
	}
	} else {
	Out << Found->second;
	}
	}

	void MicrosoftCXXNameMangler::mangleAddressSpaceType(QualType T,
	Qualifiers Quals,
	SourceRange Range) {
	// Address space is mangled as an unqualified templated type in the __clang
	// namespace. The demangled version of this is:
	// In the case of a language specific address space:
	// __clang::struct _AS[language_addr_space]<Type>
	// where:
	// <language_addr_space> ::= <OpenCL-addrspace> \| <CUDA-addrspace>
	// <OpenCL-addrspace> ::= "CL" [ "global" \| "local" \| "constant" \|
	// "private"\| "generic" ]
	// <CUDA-addrspace> ::= "CU" [ "device" \| "constant" \| "shared" ]
	// Note that the above were chosen to match the Itanium mangling for this.
	//
	// In the case of a non-language specific address space:
	// __clang::struct _AS<TargetAS, Type>
	assert(Quals.hasAddressSpace() && "Not valid without address space");
	llvm::SmallString<32> ASMangling;
	llvm::raw_svector_ostream Stream(ASMangling);
	MicrosoftCXXNameMangler Extra(Context, Stream);
	Stream << "?$";

	LangAS AS = Quals.getAddressSpace();
	if (Context.getASTContext().addressSpaceMapManglingFor(AS)) {
	unsigned TargetAS = Context.getASTContext().getTargetAddressSpace(AS);
	Extra.mangleSourceName("_AS");
	Extra.mangleIntegerLiteral(llvm::APSInt::getUnsigned(TargetAS),
	/IsBoolean/ false);
	} else {
	switch (AS) {
	default:
	llvm_unreachable("Not a language specific address space");
	case LangAS::opencl_global:
	Extra.mangleSourceName("_ASCLglobal");
	break;
	case LangAS::opencl_local:
	Extra.mangleSourceName("_ASCLlocal");
	break;
	case LangAS::opencl_constant:
	Extra.mangleSourceName("_ASCLconstant");
	break;
	case LangAS::opencl_private:
	Extra.mangleSourceName("_ASCLprivate");
	break;
	case LangAS::opencl_generic:
	Extra.mangleSourceName("_ASCLgeneric");
	break;
	case LangAS::cuda_device:
	Extra.mangleSourceName("_ASCUdevice");
	break;
	case LangAS::cuda_constant:
	Extra.mangleSourceName("_ASCUconstant");
	break;
	case LangAS::cuda_shared:
	Extra.mangleSourceName("_ASCUshared");
	break;
	}
	}

	Extra.mangleType(T, Range, QMM_Escape);
	mangleQualifiers(Qualifiers(), false);
	mangleArtificialTagType(TTK_Struct, ASMangling, {"__clang"});
	}

	void MicrosoftCXXNameMangler::mangleType(QualType T, SourceRange Range,
	QualifierMangleMode QMM) {
	// Don't use the canonical types. MSVC includes things like 'const' on
	// pointer arguments to function pointers that canonicalization strips away.
	T = T.getDesugaredType(getASTContext());
	Qualifiers Quals = T.getLocalQualifiers();

	if (const ArrayType *AT = getASTContext().getAsArrayType(T)) {
	// If there were any Quals, getAsArrayType() pushed them onto the array
	// element type.
	if (QMM == QMM_Mangle)
	Out << 'A';
	else if (QMM == QMM_Escape \|\| QMM == QMM_Result)
	Out << "$$B";
	mangleArrayType(AT);
	return;
	}

	bool IsPointer = T->isAnyPointerType() \|\| T->isMemberPointerType() \|\|
	T->isReferenceType() \|\| T->isBlockPointerType();

	switch (QMM) {
	case QMM_Drop:
	if (Quals.hasObjCLifetime())
	Quals = Quals.withoutObjCLifetime();
	break;
	case QMM_Mangle:
	if (const FunctionType *FT = dyn_cast<FunctionType>(T)) {
	Out << '6';
	mangleFunctionType(FT);
	return;
	}
	mangleQualifiers(Quals, false);
	break;
	case QMM_Escape:
	if (!IsPointer && Quals) {
	Out << "$$C";
	mangleQualifiers(Quals, false);
	}
	break;
	case QMM_Result:
	// Presence of __unaligned qualifier shouldn't affect mangling here.
	Quals.removeUnaligned();
	if (Quals.hasObjCLifetime())
	Quals = Quals.withoutObjCLifetime();
	if ((!IsPointer && Quals) \|\| isa<TagType>(T) \|\| isArtificialTagType(T)) {
	Out << '?';
	mangleQualifiers(Quals, false);
	}
	break;
	}

	const Type *ty = T.getTypePtr();

	switch (ty->getTypeClass()) {
	#define ABSTRACT_TYPE(CLASS, PARENT)
	#define NON_CANONICAL_TYPE(CLASS, PARENT) \
	case Type::CLASS: \
	llvm_unreachable("can't mangle non-canonical type " #CLASS "Type"); \
	return;
	#define TYPE(CLASS, PARENT) \
	case Type::CLASS: \
	mangleType(cast<CLASS##Type>(ty), Quals, Range); \
	break;
	#include "clang/AST/TypeNodes.def"
	#undef ABSTRACT_TYPE
	#undef NON_CANONICAL_TYPE
	#undef TYPE
	}
	}

	void MicrosoftCXXNameMangler::mangleType(const BuiltinType *T, Qualifiers,
	SourceRange Range) {
	// <type> ::= <builtin-type>
	// <builtin-type> ::= X # void
	// ::= C # signed char
	// ::= D # char
	// ::= E # unsigned char
	// ::= F # short
	// ::= G # unsigned short (or wchar_t if it's not a builtin)
	// ::= H # int
	// ::= I # unsigned int
	// ::= J # long
	// ::= K # unsigned long
	// L # <none>
	// ::= M # float
	// ::= N # double
	// ::= O # long double (__float80 is mangled differently)
	// ::= _J # long long, __int64
	// ::= _K # unsigned long long, __int64
	// ::= _L # __int128
	// ::= _M # unsigned __int128
	// ::= _N # bool
	// _O # <array in parameter>
	- // ::= _T # __float80 (Intel)
	+ // ::= _Q # char8_t
	// ::= _S # char16_t
	+ // ::= _T # __float80 (Intel)
	// ::= _U # char32_t
	// ::= _W # wchar_t
	// ::= _Z # __float80 (Digital Mars)
	switch (T->getKind()) {
	case BuiltinType::Void:
	Out << 'X';
	break;
	case BuiltinType::SChar:
	Out << 'C';
	break;
	case BuiltinType::Char_U:
	case BuiltinType::Char_S:
	Out << 'D';
	break;
	case BuiltinType::UChar:
	Out << 'E';
	break;
	case BuiltinType::Short:
	Out << 'F';
	break;
	case BuiltinType::UShort:
	Out << 'G';
	break;
	case BuiltinType::Int:
	Out << 'H';
	break;
	case BuiltinType::UInt:
	Out << 'I';
	break;
	case BuiltinType::Long:
	Out << 'J';
	break;
	case BuiltinType::ULong:
	Out << 'K';
	break;
	case BuiltinType::Float:
	Out << 'M';
	break;
	case BuiltinType::Double:
	Out << 'N';
	break;
	// TODO: Determine size and mangle accordingly
	case BuiltinType::LongDouble:
	Out << 'O';
	break;
	case BuiltinType::LongLong:
	Out << "_J";
	break;
	case BuiltinType::ULongLong:
	Out << "_K";
	break;
	case BuiltinType::Int128:
	Out << "_L";
	break;
	case BuiltinType::UInt128:
	Out << "_M";
	break;
	case BuiltinType::Bool:
	Out << "_N";
	break;
	+ case BuiltinType::Char8:
	+ Out << "_Q";
	+ break;
	case BuiltinType::Char16:
	Out << "_S";
	break;
	case BuiltinType::Char32:
	Out << "_U";
	break;
	case BuiltinType::WChar_S:
	case BuiltinType::WChar_U:
	Out << "_W";
	break;

	#define BUILTIN_TYPE(Id, SingletonId)
	#define PLACEHOLDER_TYPE(Id, SingletonId) \
	case BuiltinType::Id:
	#include "clang/AST/BuiltinTypes.def"
	case BuiltinType::Dependent:
	llvm_unreachable("placeholder types shouldn't get to name mangling");

	case BuiltinType::ObjCId:
	mangleArtificialTagType(TTK_Struct, "objc_object");
	break;
	case BuiltinType::ObjCClass:
	mangleArtificialTagType(TTK_Struct, "objc_class");
	break;
	case BuiltinType::ObjCSel:
	mangleArtificialTagType(TTK_Struct, "objc_selector");
	break;

	#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
	case BuiltinType::Id: \
	Out << "PAUocl_" #ImgType "_" #Suffix "@@"; \
	break;
	#include "clang/Basic/OpenCLImageTypes.def"
	case BuiltinType::OCLSampler:
	Out << "PA";
	mangleArtificialTagType(TTK_Struct, "ocl_sampler");
	break;
	case BuiltinType::OCLEvent:
	Out << "PA";
	mangleArtificialTagType(TTK_Struct, "ocl_event");
	break;
	case BuiltinType::OCLClkEvent:
	Out << "PA";
	mangleArtificialTagType(TTK_Struct, "ocl_clkevent");
	break;
	case BuiltinType::OCLQueue:
	Out << "PA";
	mangleArtificialTagType(TTK_Struct, "ocl_queue");
	break;
	case BuiltinType::OCLReserveID:
	Out << "PA";
	mangleArtificialTagType(TTK_Struct, "ocl_reserveid");
	break;
	#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
	case BuiltinType::Id: \
	mangleArtificialTagType(TTK_Struct, "ocl_" #ExtType); \
	break;
	#include "clang/Basic/OpenCLExtensionTypes.def"

	case BuiltinType::NullPtr:
	Out << "$$T";
	break;

	case BuiltinType::Float16:
	mangleArtificialTagType(TTK_Struct, "_Float16", {"__clang"});
	break;

	case BuiltinType::Half:
	mangleArtificialTagType(TTK_Struct, "_Half", {"__clang"});
	break;

	case BuiltinType::ShortAccum:
	case BuiltinType::Accum:
	case BuiltinType::LongAccum:
	case BuiltinType::UShortAccum:
	case BuiltinType::UAccum:
	case BuiltinType::ULongAccum:
	case BuiltinType::ShortFract:
	case BuiltinType::Fract:
	case BuiltinType::LongFract:
	case BuiltinType::UShortFract:
	case BuiltinType::UFract:
	case BuiltinType::ULongFract:
	case BuiltinType::SatShortAccum:
	case BuiltinType::SatAccum:
	case BuiltinType::SatLongAccum:
	case BuiltinType::SatUShortAccum:
	case BuiltinType::SatUAccum:
	case BuiltinType::SatULongAccum:
	case BuiltinType::SatShortFract:
	case BuiltinType::SatFract:
	case BuiltinType::SatLongFract:
	case BuiltinType::SatUShortFract:
	case BuiltinType::SatUFract:
	case BuiltinType::SatULongFract:
	- case BuiltinType::Char8:
	case BuiltinType::Float128: {
	DiagnosticsEngine &Diags = Context.getDiags();
	unsigned DiagID = Diags.getCustomDiagID(
	DiagnosticsEngine::Error, "cannot mangle this built-in %0 type yet");
	Diags.Report(Range.getBegin(), DiagID)
	<< T->getName(Context.getASTContext().getPrintingPolicy()) << Range;
	break;
	}
	}
	}

	// <type> ::= <function-type>
	void MicrosoftCXXNameMangler::mangleType(const FunctionProtoType *T, Qualifiers,
	SourceRange) {
	// Structors only appear in decls, so at this point we know it's not a
	// structor type.
	// FIXME: This may not be lambda-friendly.
	if (T->getTypeQuals() \|\| T->getRefQualifier() != RQ_None) {
	Out << "$$A8@@";
	mangleFunctionType(T, /D=/nullptr, /ForceThisQuals=/true);
	} else {
	Out << "$$A6";
	mangleFunctionType(T);
	}
	}
	void MicrosoftCXXNameMangler::mangleType(const FunctionNoProtoType *T,
	Qualifiers, SourceRange) {
	Out << "$$A6";
	mangleFunctionType(T);
	}

	void MicrosoftCXXNameMangler::mangleFunctionType(const FunctionType *T,
	const FunctionDecl *D,
	bool ForceThisQuals,
	bool MangleExceptionSpec) {
	// <function-type> ::= <this-cvr-qualifiers> <calling-convention>
	// <return-type> <argument-list> <throw-spec>
	const FunctionProtoType *Proto = dyn_cast<FunctionProtoType>(T);

	SourceRange Range;
	if (D) Range = D->getSourceRange();

	bool IsInLambda = false;
	bool IsStructor = false, HasThisQuals = ForceThisQuals, IsCtorClosure = false;
	CallingConv CC = T->getCallConv();
	if (const CXXMethodDecl *MD = dyn_cast_or_null<CXXMethodDecl>(D)) {
	if (MD->getParent()->isLambda())
	IsInLambda = true;
	if (MD->isInstance())
	HasThisQuals = true;
	if (isa<CXXDestructorDecl>(MD)) {
	IsStructor = true;
	} else if (isa<CXXConstructorDecl>(MD)) {
	IsStructor = true;
	IsCtorClosure = (StructorType == Ctor_CopyingClosure \|\|
	StructorType == Ctor_DefaultClosure) &&
	isStructorDecl(MD);
	if (IsCtorClosure)
	CC = getASTContext().getDefaultCallingConvention(
	/IsVariadic=/false, /IsCXXMethod=/true);
	}
	}

	// If this is a C++ instance method, mangle the CVR qualifiers for the
	// this pointer.
	if (HasThisQuals) {
	Qualifiers Quals = Proto->getTypeQuals();
	manglePointerExtQualifiers(Quals, /PointeeType=/QualType());
	mangleRefQualifier(Proto->getRefQualifier());
	mangleQualifiers(Quals, /IsMember=/false);
	}

	mangleCallingConvention(CC);

	// <return-type> ::= <type>
	// ::= @ # structors (they have no declared return type)
	if (IsStructor) {
	if (isa<CXXDestructorDecl>(D) && isStructorDecl(D)) {
	// The scalar deleting destructor takes an extra int argument which is not
	// reflected in the AST.
	if (StructorType == Dtor_Deleting) {
	Out << (PointersAre64Bit ? "PEAXI@Z" : "PAXI@Z");
	return;
	}
	// The vbase destructor returns void which is not reflected in the AST.
	if (StructorType == Dtor_Complete) {
	Out << "XXZ";
	return;
	}
	}
	if (IsCtorClosure) {
	// Default constructor closure and copy constructor closure both return
	// void.
	Out << 'X';

	if (StructorType == Ctor_DefaultClosure) {
	// Default constructor closure always has no arguments.
	Out << 'X';
	} else if (StructorType == Ctor_CopyingClosure) {
	// Copy constructor closure always takes an unqualified reference.
	mangleArgumentType(getASTContext().getLValueReferenceType(
	Proto->getParamType(0)
	->getAs<LValueReferenceType>()
	->getPointeeType(),
	/SpelledAsLValue=/true),
	Range);
	Out << '@';
	} else {
	llvm_unreachable("unexpected constructor closure!");
	}
	Out << 'Z';
	return;
	}
	Out << '@';
	} else {
	QualType ResultType = T->getReturnType();
	if (const auto *AT =
	dyn_cast_or_null<AutoType>(ResultType->getContainedAutoType())) {
	Out << '?';
	mangleQualifiers(ResultType.getLocalQualifiers(), /IsMember=/false);
	Out << '?';
	assert(AT->getKeyword() != AutoTypeKeyword::GNUAutoType &&
	"shouldn't need to mangle __auto_type!");
	mangleSourceName(AT->isDecltypeAuto() ? "<decltype-auto>" : "<auto>");
	Out << '@';
	} else if (IsInLambda) {
	Out << '@';
	} else {
	if (ResultType->isVoidType())
	ResultType = ResultType.getUnqualifiedType();
	mangleType(ResultType, Range, QMM_Result);
	}
	}

	// <argument-list> ::= X # void
	// ::= <type>+ @
	// ::= <type>* Z # varargs
	if (!Proto) {
	// Function types without prototypes can arise when mangling a function type
	// within an overloadable function in C. We mangle these as the absence of
	// any parameter types (not even an empty parameter list).
	Out << '@';
	} else if (Proto->getNumParams() == 0 && !Proto->isVariadic()) {
	Out << 'X';
	} else {
	// Happens for function pointer type arguments for example.
	for (unsigned I = 0, E = Proto->getNumParams(); I != E; ++I) {
	mangleArgumentType(Proto->getParamType(I), Range);
	// Mangle each pass_object_size parameter as if it's a parameter of enum
	// type passed directly after the parameter with the pass_object_size
	// attribute. The aforementioned enum's name is __pass_object_size, and we
	// pretend it resides in a top-level namespace called __clang.
	//
	// FIXME: Is there a defined extension notation for the MS ABI, or is it
	// necessary to just cross our fingers and hope this type+namespace
	// combination doesn't conflict with anything?
	if (D)
	if (const auto *P = D->getParamDecl(I)->getAttr<PassObjectSizeAttr>())
	manglePassObjectSizeArg(P);
	}
	// <builtin-type> ::= Z # ellipsis
	if (Proto->isVariadic())
	Out << 'Z';
	else
	Out << '@';
	}

	if (MangleExceptionSpec && getASTContext().getLangOpts().CPlusPlus17 &&
	getASTContext().getLangOpts().isCompatibleWithMSVC(
	LangOptions::MSVC2017_5))
	mangleThrowSpecification(Proto);
	else
	Out << 'Z';
	}

	void MicrosoftCXXNameMangler::mangleFunctionClass(const FunctionDecl *FD) {
	// <function-class> ::= <member-function> E? # E designates a 64-bit 'this'
	// # pointer. in 64-bit mode all
	// # 'this' pointers are 64-bit.
	// ::= <global-function>
	// <member-function> ::= A # private: near
	// ::= B # private: far
	// ::= C # private: static near
	// ::= D # private: static far
	// ::= E # private: virtual near
	// ::= F # private: virtual far
	// ::= I # protected: near
	// ::= J # protected: far
	// ::= K # protected: static near
	// ::= L # protected: static far
	// ::= M # protected: virtual near
	// ::= N # protected: virtual far
	// ::= Q # public: near
	// ::= R # public: far
	// ::= S # public: static near
	// ::= T # public: static far
	// ::= U # public: virtual near
	// ::= V # public: virtual far
	// <global-function> ::= Y # global near
	// ::= Z # global far
	if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD)) {
	bool IsVirtual = MD->isVirtual();
	// When mangling vbase destructor variants, ignore whether or not the
	// underlying destructor was defined to be virtual.
	if (isa<CXXDestructorDecl>(MD) && isStructorDecl(MD) &&
	StructorType == Dtor_Complete) {
	IsVirtual = false;
	}
	switch (MD->getAccess()) {
	case AS_none:
	llvm_unreachable("Unsupported access specifier");
	case AS_private:
	if (MD->isStatic())
	Out << 'C';
	else if (IsVirtual)
	Out << 'E';
	else
	Out << 'A';
	break;
	case AS_protected:
	if (MD->isStatic())
	Out << 'K';
	else if (IsVirtual)
	Out << 'M';
	else
	Out << 'I';
	break;
	case AS_public:
	if (MD->isStatic())
	Out << 'S';
	else if (IsVirtual)
	Out << 'U';
	else
	Out << 'Q';
	}
	} else {
	Out << 'Y';
	}
	}
	void MicrosoftCXXNameMangler::mangleCallingConvention(CallingConv CC) {
	// <calling-convention> ::= A # __cdecl
	// ::= B # __export __cdecl
	// ::= C # __pascal
	// ::= D # __export __pascal
	// ::= E # __thiscall
	// ::= F # __export __thiscall
	// ::= G # __stdcall
	// ::= H # __export __stdcall
	// ::= I # __fastcall
	// ::= J # __export __fastcall
	// ::= Q # __vectorcall
	// ::= w # __regcall
	// The 'export' calling conventions are from a bygone era
	// (coughWin16cough) when functions were declared for export with
	// that keyword. (It didn't actually export them, it just made them so
	// that they could be in a DLL and somebody from another module could call
	// them.)

	switch (CC) {
	default:
	llvm_unreachable("Unsupported CC for mangling");
	case CC_Win64:
	case CC_X86_64SysV:
	case CC_C: Out << 'A'; break;
	case CC_X86Pascal: Out << 'C'; break;
	case CC_X86ThisCall: Out << 'E'; break;
	case CC_X86StdCall: Out << 'G'; break;
	case CC_X86FastCall: Out << 'I'; break;
	case CC_X86VectorCall: Out << 'Q'; break;
	case CC_Swift: Out << 'S'; break;
	case CC_PreserveMost: Out << 'U'; break;
	case CC_X86RegCall: Out << 'w'; break;
	}
	}
	void MicrosoftCXXNameMangler::mangleCallingConvention(const FunctionType *T) {
	mangleCallingConvention(T->getCallConv());
	}

	void MicrosoftCXXNameMangler::mangleThrowSpecification(
	const FunctionProtoType *FT) {
	// <throw-spec> ::= Z # (default)
	// ::= _E # noexcept
	if (FT->canThrow())
	Out << 'Z';
	else
	Out << "_E";
	}

	void MicrosoftCXXNameMangler::mangleType(const UnresolvedUsingType *T,
	Qualifiers, SourceRange Range) {
	// Probably should be mangled as a template instantiation; need to see what
	// VC does first.
	DiagnosticsEngine &Diags = Context.getDiags();
	unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
	"cannot mangle this unresolved dependent type yet");
	Diags.Report(Range.getBegin(), DiagID)
	<< Range;
	}

	// <type> ::= <union-type> \| <struct-type> \| <class-type> \| <enum-type>
	// <union-type> ::= T <name>
	// <struct-type> ::= U <name>
	// <class-type> ::= V <name>
	// <enum-type> ::= W4 <name>
	void MicrosoftCXXNameMangler::mangleTagTypeKind(TagTypeKind TTK) {
	switch (TTK) {
	case TTK_Union:
	Out << 'T';
	break;
	case TTK_Struct:
	case TTK_Interface:
	Out << 'U';
	break;
	case TTK_Class:
	Out << 'V';
	break;
	case TTK_Enum:
	Out << "W4";
	break;
	}
	}
	void MicrosoftCXXNameMangler::mangleType(const EnumType *T, Qualifiers,
	SourceRange) {
	mangleType(cast<TagType>(T)->getDecl());
	}
	void MicrosoftCXXNameMangler::mangleType(const RecordType *T, Qualifiers,
	SourceRange) {
	mangleType(cast<TagType>(T)->getDecl());
	}
	void MicrosoftCXXNameMangler::mangleType(const TagDecl *TD) {
	mangleTagTypeKind(TD->getTagKind());
	mangleName(TD);
	}

	// If you add a call to this, consider updating isArtificialTagType() too.
	void MicrosoftCXXNameMangler::mangleArtificialTagType(
	TagTypeKind TK, StringRef UnqualifiedName,
	ArrayRef<StringRef> NestedNames) {
	// <name> ::= <unscoped-name> {[<named-scope>]+ \| [<nested-name>]}? @
	mangleTagTypeKind(TK);

	// Always start with the unqualified name.
	mangleSourceName(UnqualifiedName);

	for (auto I = NestedNames.rbegin(), E = NestedNames.rend(); I != E; ++I)
	mangleSourceName(*I);

	// Terminate the whole name with an '@'.
	Out << '@';
	}

	// <type> ::= <array-type>
	// <array-type> ::= <pointer-cvr-qualifiers> <cvr-qualifiers>
	// [Y <dimension-count> <dimension>+]
	// <element-type> # as global, E is never required
	// It's supposed to be the other way around, but for some strange reason, it
	// isn't. Today this behavior is retained for the sole purpose of backwards
	// compatibility.
	void MicrosoftCXXNameMangler::mangleDecayedArrayType(const ArrayType *T) {
	// This isn't a recursive mangling, so now we have to do it all in this
	// one call.
	manglePointerCVQualifiers(T->getElementType().getQualifiers());
	mangleType(T->getElementType(), SourceRange());
	}
	void MicrosoftCXXNameMangler::mangleType(const ConstantArrayType *T, Qualifiers,
	SourceRange) {
	llvm_unreachable("Should have been special cased");
	}
	void MicrosoftCXXNameMangler::mangleType(const VariableArrayType *T, Qualifiers,
	SourceRange) {
	llvm_unreachable("Should have been special cased");
	}
	void MicrosoftCXXNameMangler::mangleType(const DependentSizedArrayType *T,
	Qualifiers, SourceRange) {
	llvm_unreachable("Should have been special cased");
	}
	void MicrosoftCXXNameMangler::mangleType(const IncompleteArrayType *T,
	Qualifiers, SourceRange) {
	llvm_unreachable("Should have been special cased");
	}
	void MicrosoftCXXNameMangler::mangleArrayType(const ArrayType *T) {
	QualType ElementTy(T, 0);
	SmallVector<llvm::APInt, 3> Dimensions;
	for (;;) {
	if (ElementTy->isConstantArrayType()) {
	const ConstantArrayType *CAT =
	getASTContext().getAsConstantArrayType(ElementTy);
	Dimensions.push_back(CAT->getSize());
	ElementTy = CAT->getElementType();
	} else if (ElementTy->isIncompleteArrayType()) {
	const IncompleteArrayType *IAT =
	getASTContext().getAsIncompleteArrayType(ElementTy);
	Dimensions.push_back(llvm::APInt(32, 0));
	ElementTy = IAT->getElementType();
	} else if (ElementTy->isVariableArrayType()) {
	const VariableArrayType *VAT =
	getASTContext().getAsVariableArrayType(ElementTy);
	Dimensions.push_back(llvm::APInt(32, 0));
	ElementTy = VAT->getElementType();
	} else if (ElementTy->isDependentSizedArrayType()) {
	// The dependent expression has to be folded into a constant (TODO).
	const DependentSizedArrayType *DSAT =
	getASTContext().getAsDependentSizedArrayType(ElementTy);
	DiagnosticsEngine &Diags = Context.getDiags();
	unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
	"cannot mangle this dependent-length array yet");
	Diags.Report(DSAT->getSizeExpr()->getExprLoc(), DiagID)
	<< DSAT->getBracketsRange();
	return;
	} else {
	break;
	}
	}
	Out << 'Y';
	// <dimension-count> ::= <number> # number of extra dimensions
	mangleNumber(Dimensions.size());
	for (const llvm::APInt &Dimension : Dimensions)
	mangleNumber(Dimension.getLimitedValue());
	mangleType(ElementTy, SourceRange(), QMM_Escape);
	}

	// <type> ::= <pointer-to-member-type>
	// <pointer-to-member-type> ::= <pointer-cvr-qualifiers> <cvr-qualifiers>
	// <class name> <type>
	void MicrosoftCXXNameMangler::mangleType(const MemberPointerType *T,
	Qualifiers Quals, SourceRange Range) {
	QualType PointeeType = T->getPointeeType();
	manglePointerCVQualifiers(Quals);
	manglePointerExtQualifiers(Quals, PointeeType);
	if (const FunctionProtoType *FPT = PointeeType->getAs<FunctionProtoType>()) {
	Out << '8';
	mangleName(T->getClass()->castAs<RecordType>()->getDecl());
	mangleFunctionType(FPT, nullptr, true);
	} else {
	mangleQualifiers(PointeeType.getQualifiers(), true);
	mangleName(T->getClass()->castAs<RecordType>()->getDecl());
	mangleType(PointeeType, Range, QMM_Drop);
	}
	}

	void MicrosoftCXXNameMangler::mangleType(const TemplateTypeParmType *T,
	Qualifiers, SourceRange Range) {
	DiagnosticsEngine &Diags = Context.getDiags();
	unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
	"cannot mangle this template type parameter type yet");
	Diags.Report(Range.getBegin(), DiagID)
	<< Range;
	}

	void MicrosoftCXXNameMangler::mangleType(const SubstTemplateTypeParmPackType *T,
	Qualifiers, SourceRange Range) {
	DiagnosticsEngine &Diags = Context.getDiags();
	unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
	"cannot mangle this substituted parameter pack yet");
	Diags.Report(Range.getBegin(), DiagID)
	<< Range;
	}

	// <type> ::= <pointer-type>
	// <pointer-type> ::= E? <pointer-cvr-qualifiers> <cvr-qualifiers> <type>
	// # the E is required for 64-bit non-static pointers
	void MicrosoftCXXNameMangler::mangleType(const PointerType *T, Qualifiers Quals,
	SourceRange Range) {
	QualType PointeeType = T->getPointeeType();
	manglePointerCVQualifiers(Quals);
	manglePointerExtQualifiers(Quals, PointeeType);

	if (PointeeType.getQualifiers().hasAddressSpace())
	mangleAddressSpaceType(PointeeType, PointeeType.getQualifiers(), Range);
	else
	mangleType(PointeeType, Range);
	}

	void MicrosoftCXXNameMangler::mangleType(const ObjCObjectPointerType *T,
	Qualifiers Quals, SourceRange Range) {
	QualType PointeeType = T->getPointeeType();
	switch (Quals.getObjCLifetime()) {
	case Qualifiers::OCL_None:
	case Qualifiers::OCL_ExplicitNone:
	break;
	case Qualifiers::OCL_Autoreleasing:
	case Qualifiers::OCL_Strong:
	case Qualifiers::OCL_Weak:
	return mangleObjCLifetime(PointeeType, Quals, Range);
	}
	manglePointerCVQualifiers(Quals);
	manglePointerExtQualifiers(Quals, PointeeType);
	mangleType(PointeeType, Range);
	}

	// <type> ::= <reference-type>
	// <reference-type> ::= A E? <cvr-qualifiers> <type>
	// # the E is required for 64-bit non-static lvalue references
	void MicrosoftCXXNameMangler::mangleType(const LValueReferenceType *T,
	Qualifiers Quals, SourceRange Range) {
	QualType PointeeType = T->getPointeeType();
	assert(!Quals.hasConst() && !Quals.hasVolatile() && "unexpected qualifier!");
	Out << 'A';
	manglePointerExtQualifiers(Quals, PointeeType);
	mangleType(PointeeType, Range);
	}

	// <type> ::= <r-value-reference-type>
	// <r-value-reference-type> ::= $$Q E? <cvr-qualifiers> <type>
	// # the E is required for 64-bit non-static rvalue references
	void MicrosoftCXXNameMangler::mangleType(const RValueReferenceType *T,
	Qualifiers Quals, SourceRange Range) {
	QualType PointeeType = T->getPointeeType();
	assert(!Quals.hasConst() && !Quals.hasVolatile() && "unexpected qualifier!");
	Out << "$$Q";
	manglePointerExtQualifiers(Quals, PointeeType);
	mangleType(PointeeType, Range);
	}

	void MicrosoftCXXNameMangler::mangleType(const ComplexType *T, Qualifiers,
	SourceRange Range) {
	QualType ElementType = T->getElementType();

	llvm::SmallString<64> TemplateMangling;
	llvm::raw_svector_ostream Stream(TemplateMangling);
	MicrosoftCXXNameMangler Extra(Context, Stream);
	Stream << "?$";
	Extra.mangleSourceName("_Complex");
	Extra.mangleType(ElementType, Range, QMM_Escape);

	mangleArtificialTagType(TTK_Struct, TemplateMangling, {"__clang"});
	}

	// Returns true for types that mangleArtificialTagType() gets called for with
	// TTK_Union, TTK_Struct, TTK_Class and where compatibility with MSVC's
	// mangling matters.
	// (It doesn't matter for Objective-C types and the like that cl.exe doesn't
	// support.)
	bool MicrosoftCXXNameMangler::isArtificialTagType(QualType T) const {
	const Type *ty = T.getTypePtr();
	switch (ty->getTypeClass()) {
	default:
	return false;

	case Type::Vector: {
	// For ABI compatibility only __m64, __m128(id), and __m256(id) matter,
	// but since mangleType(VectorType*) always calls mangleArtificialTagType()
	// just always return true (the other vector types are clang-only).
	return true;
	}
	}
	}

	void MicrosoftCXXNameMangler::mangleType(const VectorType *T, Qualifiers Quals,
	SourceRange Range) {
	const BuiltinType *ET = T->getElementType()->getAs<BuiltinType>();
	assert(ET && "vectors with non-builtin elements are unsupported");
	uint64_t Width = getASTContext().getTypeSize(T);
	// Pattern match exactly the typedefs in our intrinsic headers. Anything that
	// doesn't match the Intel types uses a custom mangling below.
	size_t OutSizeBefore = Out.tell();
	if (!isa<ExtVectorType>(T)) {
	llvm::Triple::ArchType AT =
	getASTContext().getTargetInfo().getTriple().getArch();
	if (AT == llvm::Triple::x86 \|\| AT == llvm::Triple::x86_64) {
	if (Width == 64 && ET->getKind() == BuiltinType::LongLong) {
	mangleArtificialTagType(TTK_Union, "__m64");
	} else if (Width >= 128) {
	if (ET->getKind() == BuiltinType::Float)
	mangleArtificialTagType(TTK_Union, "__m" + llvm::utostr(Width));
	else if (ET->getKind() == BuiltinType::LongLong)
	mangleArtificialTagType(TTK_Union, "__m" + llvm::utostr(Width) + 'i');
	else if (ET->getKind() == BuiltinType::Double)
	mangleArtificialTagType(TTK_Struct, "__m" + llvm::utostr(Width) + 'd');
	}
	}
	}

	bool IsBuiltin = Out.tell() != OutSizeBefore;
	if (!IsBuiltin) {
	// The MS ABI doesn't have a special mangling for vector types, so we define
	// our own mangling to handle uses of __vector_size__ on user-specified
	// types, and for extensions like __v4sf.

	llvm::SmallString<64> TemplateMangling;
	llvm::raw_svector_ostream Stream(TemplateMangling);
	MicrosoftCXXNameMangler Extra(Context, Stream);
	Stream << "?$";
	Extra.mangleSourceName("__vector");
	Extra.mangleType(QualType(ET, 0), Range, QMM_Escape);
	Extra.mangleIntegerLiteral(llvm::APSInt::getUnsigned(T->getNumElements()),
	/IsBoolean=/false);

	mangleArtificialTagType(TTK_Union, TemplateMangling, {"__clang"});
	}
	}

	void MicrosoftCXXNameMangler::mangleType(const ExtVectorType *T,
	Qualifiers Quals, SourceRange Range) {
	mangleType(static_cast<const VectorType *>(T), Quals, Range);
	}

	void MicrosoftCXXNameMangler::mangleType(const DependentVectorType *T,
	Qualifiers, SourceRange Range) {
	DiagnosticsEngine &Diags = Context.getDiags();
	unsigned DiagID = Diags.getCustomDiagID(
	DiagnosticsEngine::Error,
	"cannot mangle this dependent-sized vector type yet");
	Diags.Report(Range.getBegin(), DiagID) << Range;
	}

	void MicrosoftCXXNameMangler::mangleType(const DependentSizedExtVectorType *T,
	Qualifiers, SourceRange Range) {
	DiagnosticsEngine &Diags = Context.getDiags();
	unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
	"cannot mangle this dependent-sized extended vector type yet");
	Diags.Report(Range.getBegin(), DiagID)
	<< Range;
	}

	void MicrosoftCXXNameMangler::mangleType(const DependentAddressSpaceType *T,
	Qualifiers, SourceRange Range) {
	DiagnosticsEngine &Diags = Context.getDiags();
	unsigned DiagID = Diags.getCustomDiagID(
	DiagnosticsEngine::Error,
	"cannot mangle this dependent address space type yet");
	Diags.Report(Range.getBegin(), DiagID) << Range;
	}

	void MicrosoftCXXNameMangler::mangleType(const ObjCInterfaceType *T, Qualifiers,
	SourceRange) {
	// ObjC interfaces have structs underlying them.
	mangleTagTypeKind(TTK_Struct);
	mangleName(T->getDecl());
	}

	void MicrosoftCXXNameMangler::mangleType(const ObjCObjectType *T,
	Qualifiers Quals, SourceRange Range) {
	if (T->isKindOfType())
	return mangleObjCKindOfType(T, Quals, Range);

	if (T->qual_empty() && !T->isSpecialized())
	return mangleType(T->getBaseType(), Range, QMM_Drop);

	ArgBackRefMap OuterArgsContext;
	BackRefVec OuterTemplateContext;

	TypeBackReferences.swap(OuterArgsContext);
	NameBackReferences.swap(OuterTemplateContext);

	mangleTagTypeKind(TTK_Struct);

	Out << "?$";
	if (T->isObjCId())
	mangleSourceName("objc_object");
	else if (T->isObjCClass())
	mangleSourceName("objc_class");
	else
	mangleSourceName(T->getInterface()->getName());

	for (const auto &Q : T->quals())
	mangleObjCProtocol(Q);

	if (T->isSpecialized())
	for (const auto &TA : T->getTypeArgs())
	mangleType(TA, Range, QMM_Drop);

	Out << '@';

	Out << '@';

	TypeBackReferences.swap(OuterArgsContext);
	NameBackReferences.swap(OuterTemplateContext);
	}

	void MicrosoftCXXNameMangler::mangleType(const BlockPointerType *T,
	Qualifiers Quals, SourceRange Range) {
	QualType PointeeType = T->getPointeeType();
	manglePointerCVQualifiers(Quals);
	manglePointerExtQualifiers(Quals, PointeeType);

	Out << "_E";

	mangleFunctionType(PointeeType->castAs<FunctionProtoType>());
	}

	void MicrosoftCXXNameMangler::mangleType(const InjectedClassNameType *,
	Qualifiers, SourceRange) {
	llvm_unreachable("Cannot mangle injected class name type.");
	}

	void MicrosoftCXXNameMangler::mangleType(const TemplateSpecializationType *T,
	Qualifiers, SourceRange Range) {
	DiagnosticsEngine &Diags = Context.getDiags();
	unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
	"cannot mangle this template specialization type yet");
	Diags.Report(Range.getBegin(), DiagID)
	<< Range;
	}

	void MicrosoftCXXNameMangler::mangleType(const DependentNameType *T, Qualifiers,
	SourceRange Range) {
	DiagnosticsEngine &Diags = Context.getDiags();
	unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
	"cannot mangle this dependent name type yet");
	Diags.Report(Range.getBegin(), DiagID)
	<< Range;
	}

	void MicrosoftCXXNameMangler::mangleType(
	const DependentTemplateSpecializationType *T, Qualifiers,
	SourceRange Range) {
	DiagnosticsEngine &Diags = Context.getDiags();
	unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
	"cannot mangle this dependent template specialization type yet");
	Diags.Report(Range.getBegin(), DiagID)
	<< Range;
	}

	void MicrosoftCXXNameMangler::mangleType(const PackExpansionType *T, Qualifiers,
	SourceRange Range) {
	DiagnosticsEngine &Diags = Context.getDiags();
	unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
	"cannot mangle this pack expansion yet");
	Diags.Report(Range.getBegin(), DiagID)
	<< Range;
	}

	void MicrosoftCXXNameMangler::mangleType(const TypeOfType *T, Qualifiers,
	SourceRange Range) {
	DiagnosticsEngine &Diags = Context.getDiags();
	unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
	"cannot mangle this typeof(type) yet");
	Diags.Report(Range.getBegin(), DiagID)
	<< Range;
	}

	void MicrosoftCXXNameMangler::mangleType(const TypeOfExprType *T, Qualifiers,
	SourceRange Range) {
	DiagnosticsEngine &Diags = Context.getDiags();
	unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
	"cannot mangle this typeof(expression) yet");
	Diags.Report(Range.getBegin(), DiagID)
	<< Range;
	}

	void MicrosoftCXXNameMangler::mangleType(const DecltypeType *T, Qualifiers,
	SourceRange Range) {
	DiagnosticsEngine &Diags = Context.getDiags();
	unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
	"cannot mangle this decltype() yet");
	Diags.Report(Range.getBegin(), DiagID)
	<< Range;
	}

	void MicrosoftCXXNameMangler::mangleType(const UnaryTransformType *T,
	Qualifiers, SourceRange Range) {
	DiagnosticsEngine &Diags = Context.getDiags();
	unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
	"cannot mangle this unary transform type yet");
	Diags.Report(Range.getBegin(), DiagID)
	<< Range;
	}

	void MicrosoftCXXNameMangler::mangleType(const AutoType *T, Qualifiers,
	SourceRange Range) {
	assert(T->getDeducedType().isNull() && "expecting a dependent type!");

	DiagnosticsEngine &Diags = Context.getDiags();
	unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
	"cannot mangle this 'auto' type yet");
	Diags.Report(Range.getBegin(), DiagID)
	<< Range;
	}

	void MicrosoftCXXNameMangler::mangleType(
	const DeducedTemplateSpecializationType *T, Qualifiers, SourceRange Range) {
	assert(T->getDeducedType().isNull() && "expecting a dependent type!");

	DiagnosticsEngine &Diags = Context.getDiags();
	unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
	"cannot mangle this deduced class template specialization type yet");
	Diags.Report(Range.getBegin(), DiagID)
	<< Range;
	}

	void MicrosoftCXXNameMangler::mangleType(const AtomicType *T, Qualifiers,
	SourceRange Range) {
	QualType ValueType = T->getValueType();

	llvm::SmallString<64> TemplateMangling;
	llvm::raw_svector_ostream Stream(TemplateMangling);
	MicrosoftCXXNameMangler Extra(Context, Stream);
	Stream << "?$";
	Extra.mangleSourceName("_Atomic");
	Extra.mangleType(ValueType, Range, QMM_Escape);

	mangleArtificialTagType(TTK_Struct, TemplateMangling, {"__clang"});
	}

	void MicrosoftCXXNameMangler::mangleType(const PipeType *T, Qualifiers,
	SourceRange Range) {
	DiagnosticsEngine &Diags = Context.getDiags();
	unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
	"cannot mangle this OpenCL pipe type yet");
	Diags.Report(Range.getBegin(), DiagID)
	<< Range;
	}

	void MicrosoftMangleContextImpl::mangleCXXName(const NamedDecl *D,
	raw_ostream &Out) {
	assert((isa<FunctionDecl>(D) \|\| isa<VarDecl>(D)) &&
	"Invalid mangleName() call, argument is not a variable or function!");
	assert(!isa<CXXConstructorDecl>(D) && !isa<CXXDestructorDecl>(D) &&
	"Invalid mangleName() call on 'structor decl!");

	PrettyStackTraceDecl CrashInfo(D, SourceLocation(),
	getASTContext().getSourceManager(),
	"Mangling declaration");

	msvc_hashing_ostream MHO(Out);
	MicrosoftCXXNameMangler Mangler(*this, MHO);
	return Mangler.mangle(D);
	}

	// <this-adjustment> ::= <no-adjustment> \| <static-adjustment> \|
	// <virtual-adjustment>
	// <no-adjustment> ::= A # private near
	// ::= B # private far
	// ::= I # protected near
	// ::= J # protected far
	// ::= Q # public near
	// ::= R # public far
	// <static-adjustment> ::= G <static-offset> # private near
	// ::= H <static-offset> # private far
	// ::= O <static-offset> # protected near
	// ::= P <static-offset> # protected far
	// ::= W <static-offset> # public near
	// ::= X <static-offset> # public far
	// <virtual-adjustment> ::= $0 <virtual-shift> <static-offset> # private near
	// ::= $1 <virtual-shift> <static-offset> # private far
	// ::= $2 <virtual-shift> <static-offset> # protected near
	// ::= $3 <virtual-shift> <static-offset> # protected far
	// ::= $4 <virtual-shift> <static-offset> # public near
	// ::= $5 <virtual-shift> <static-offset> # public far
	// <virtual-shift> ::= <vtordisp-shift> \| <vtordispex-shift>
	// <vtordisp-shift> ::= <offset-to-vtordisp>
	// <vtordispex-shift> ::= <offset-to-vbptr> <vbase-offset-offset>
	// <offset-to-vtordisp>
	static void mangleThunkThisAdjustment(AccessSpecifier AS,
	const ThisAdjustment &Adjustment,
	MicrosoftCXXNameMangler &Mangler,
	raw_ostream &Out) {
	if (!Adjustment.Virtual.isEmpty()) {
	Out << '$';
	char AccessSpec;
	switch (AS) {
	case AS_none:
	llvm_unreachable("Unsupported access specifier");
	case AS_private:
	AccessSpec = '0';
	break;
	case AS_protected:
	AccessSpec = '2';
	break;
	case AS_public:
	AccessSpec = '4';
	}
	if (Adjustment.Virtual.Microsoft.VBPtrOffset) {
	Out << 'R' << AccessSpec;
	Mangler.mangleNumber(
	static_cast<uint32_t>(Adjustment.Virtual.Microsoft.VBPtrOffset));
	Mangler.mangleNumber(
	static_cast<uint32_t>(Adjustment.Virtual.Microsoft.VBOffsetOffset));
	Mangler.mangleNumber(
	static_cast<uint32_t>(Adjustment.Virtual.Microsoft.VtordispOffset));
	Mangler.mangleNumber(static_cast<uint32_t>(Adjustment.NonVirtual));
	} else {
	Out << AccessSpec;
	Mangler.mangleNumber(
	static_cast<uint32_t>(Adjustment.Virtual.Microsoft.VtordispOffset));
	Mangler.mangleNumber(-static_cast<uint32_t>(Adjustment.NonVirtual));
	}
	} else if (Adjustment.NonVirtual != 0) {
	switch (AS) {
	case AS_none:
	llvm_unreachable("Unsupported access specifier");
	case AS_private:
	Out << 'G';
	break;
	case AS_protected:
	Out << 'O';
	break;
	case AS_public:
	Out << 'W';
	}
	Mangler.mangleNumber(-static_cast<uint32_t>(Adjustment.NonVirtual));
	} else {
	switch (AS) {
	case AS_none:
	llvm_unreachable("Unsupported access specifier");
	case AS_private:
	Out << 'A';
	break;
	case AS_protected:
	Out << 'I';
	break;
	case AS_public:
	Out << 'Q';
	}
	}
	}

	void MicrosoftMangleContextImpl::mangleVirtualMemPtrThunk(
	const CXXMethodDecl *MD, const MethodVFTableLocation &ML,
	raw_ostream &Out) {
	msvc_hashing_ostream MHO(Out);
	MicrosoftCXXNameMangler Mangler(*this, MHO);
	Mangler.getStream() << '?';
	Mangler.mangleVirtualMemPtrThunk(MD, ML);
	}

	void MicrosoftMangleContextImpl::mangleThunk(const CXXMethodDecl *MD,
	const ThunkInfo &Thunk,
	raw_ostream &Out) {
	msvc_hashing_ostream MHO(Out);
	MicrosoftCXXNameMangler Mangler(*this, MHO);
	Mangler.getStream() << '?';
	Mangler.mangleName(MD);

	// Usually the thunk uses the access specifier of the new method, but if this
	// is a covariant return thunk, then MSVC always uses the public access
	// specifier, and we do the same.
	AccessSpecifier AS = Thunk.Return.isEmpty() ? MD->getAccess() : AS_public;
	mangleThunkThisAdjustment(AS, Thunk.This, Mangler, MHO);

	if (!Thunk.Return.isEmpty())
	assert(Thunk.Method != nullptr &&
	"Thunk info should hold the overridee decl");

	const CXXMethodDecl *DeclForFPT = Thunk.Method ? Thunk.Method : MD;
	Mangler.mangleFunctionType(
	DeclForFPT->getType()->castAs<FunctionProtoType>(), MD);
	}

	void MicrosoftMangleContextImpl::mangleCXXDtorThunk(
	const CXXDestructorDecl *DD, CXXDtorType Type,
	const ThisAdjustment &Adjustment, raw_ostream &Out) {
	// FIXME: Actually, the dtor thunk should be emitted for vector deleting
	// dtors rather than scalar deleting dtors. Just use the vector deleting dtor
	// mangling manually until we support both deleting dtor types.
	assert(Type == Dtor_Deleting);
	msvc_hashing_ostream MHO(Out);
	MicrosoftCXXNameMangler Mangler(*this, MHO, DD, Type);
	Mangler.getStream() << "??_E";
	Mangler.mangleName(DD->getParent());
	mangleThunkThisAdjustment(DD->getAccess(), Adjustment, Mangler, MHO);
	Mangler.mangleFunctionType(DD->getType()->castAs<FunctionProtoType>(), DD);
	}

	void MicrosoftMangleContextImpl::mangleCXXVFTable(
	const CXXRecordDecl Derived, ArrayRef<const CXXRecordDecl > BasePath,
	raw_ostream &Out) {
	// <mangled-name> ::= ?_7 <class-name> <storage-class>
	// <cvr-qualifiers> [<name>] @
	// NOTE: <cvr-qualifiers> here is always 'B' (const). <storage-class>
	// is always '6' for vftables.
	msvc_hashing_ostream MHO(Out);
	MicrosoftCXXNameMangler Mangler(*this, MHO);
	if (Derived->hasAttr<DLLImportAttr>())
	Mangler.getStream() << "??_S";
	else
	Mangler.getStream() << "??_7";
	Mangler.mangleName(Derived);
	Mangler.getStream() << "6B"; // '6' for vftable, 'B' for const.
	for (const CXXRecordDecl *RD : BasePath)
	Mangler.mangleName(RD);
	Mangler.getStream() << '@';
	}

	void MicrosoftMangleContextImpl::mangleCXXVBTable(
	const CXXRecordDecl Derived, ArrayRef<const CXXRecordDecl > BasePath,
	raw_ostream &Out) {
	// <mangled-name> ::= ?_8 <class-name> <storage-class>
	// <cvr-qualifiers> [<name>] @
	// NOTE: <cvr-qualifiers> here is always 'B' (const). <storage-class>
	// is always '7' for vbtables.
	msvc_hashing_ostream MHO(Out);
	MicrosoftCXXNameMangler Mangler(*this, MHO);
	Mangler.getStream() << "??_8";
	Mangler.mangleName(Derived);
	Mangler.getStream() << "7B"; // '7' for vbtable, 'B' for const.
	for (const CXXRecordDecl *RD : BasePath)
	Mangler.mangleName(RD);
	Mangler.getStream() << '@';
	}

	void MicrosoftMangleContextImpl::mangleCXXRTTI(QualType T, raw_ostream &Out) {
	msvc_hashing_ostream MHO(Out);
	MicrosoftCXXNameMangler Mangler(*this, MHO);
	Mangler.getStream() << "??_R0";
	Mangler.mangleType(T, SourceRange(), MicrosoftCXXNameMangler::QMM_Result);
	Mangler.getStream() << "@8";
	}

	void MicrosoftMangleContextImpl::mangleCXXRTTIName(QualType T,
	raw_ostream &Out) {
	MicrosoftCXXNameMangler Mangler(*this, Out);
	Mangler.getStream() << '.';
	Mangler.mangleType(T, SourceRange(), MicrosoftCXXNameMangler::QMM_Result);
	}

	void MicrosoftMangleContextImpl::mangleCXXVirtualDisplacementMap(
	const CXXRecordDecl SrcRD, const CXXRecordDecl DstRD, raw_ostream &Out) {
	msvc_hashing_ostream MHO(Out);
	MicrosoftCXXNameMangler Mangler(*this, MHO);
	Mangler.getStream() << "??_K";
	Mangler.mangleName(SrcRD);
	Mangler.getStream() << "$C";
	Mangler.mangleName(DstRD);
	}

	void MicrosoftMangleContextImpl::mangleCXXThrowInfo(QualType T, bool IsConst,
	bool IsVolatile,
	bool IsUnaligned,
	uint32_t NumEntries,
	raw_ostream &Out) {
	msvc_hashing_ostream MHO(Out);
	MicrosoftCXXNameMangler Mangler(*this, MHO);
	Mangler.getStream() << "_TI";
	if (IsConst)
	Mangler.getStream() << 'C';
	if (IsVolatile)
	Mangler.getStream() << 'V';
	if (IsUnaligned)
	Mangler.getStream() << 'U';
	Mangler.getStream() << NumEntries;
	Mangler.mangleType(T, SourceRange(), MicrosoftCXXNameMangler::QMM_Result);
	}

	void MicrosoftMangleContextImpl::mangleCXXCatchableTypeArray(
	QualType T, uint32_t NumEntries, raw_ostream &Out) {
	msvc_hashing_ostream MHO(Out);
	MicrosoftCXXNameMangler Mangler(*this, MHO);
	Mangler.getStream() << "_CTA";
	Mangler.getStream() << NumEntries;
	Mangler.mangleType(T, SourceRange(), MicrosoftCXXNameMangler::QMM_Result);
	}

	void MicrosoftMangleContextImpl::mangleCXXCatchableType(
	QualType T, const CXXConstructorDecl *CD, CXXCtorType CT, uint32_t Size,
	uint32_t NVOffset, int32_t VBPtrOffset, uint32_t VBIndex,
	raw_ostream &Out) {
	MicrosoftCXXNameMangler Mangler(*this, Out);
	Mangler.getStream() << "_CT";

	llvm::SmallString<64> RTTIMangling;
	{
	llvm::raw_svector_ostream Stream(RTTIMangling);
	msvc_hashing_ostream MHO(Stream);
	mangleCXXRTTI(T, MHO);
	}
	Mangler.getStream() << RTTIMangling;

	// VS2015 CTP6 omits the copy-constructor in the mangled name. This name is,
	// in fact, superfluous but I'm not sure the change was made consciously.
	llvm::SmallString<64> CopyCtorMangling;
	if (!getASTContext().getLangOpts().isCompatibleWithMSVC(
	LangOptions::MSVC2015) &&
	CD) {
	llvm::raw_svector_ostream Stream(CopyCtorMangling);
	msvc_hashing_ostream MHO(Stream);
	mangleCXXCtor(CD, CT, MHO);
	}
	Mangler.getStream() << CopyCtorMangling;

	Mangler.getStream() << Size;
	if (VBPtrOffset == -1) {
	if (NVOffset) {
	Mangler.getStream() << NVOffset;
	}
	} else {
	Mangler.getStream() << NVOffset;
	Mangler.getStream() << VBPtrOffset;
	Mangler.getStream() << VBIndex;
	}
	}

	void MicrosoftMangleContextImpl::mangleCXXRTTIBaseClassDescriptor(
	const CXXRecordDecl *Derived, uint32_t NVOffset, int32_t VBPtrOffset,
	uint32_t VBTableOffset, uint32_t Flags, raw_ostream &Out) {
	msvc_hashing_ostream MHO(Out);
	MicrosoftCXXNameMangler Mangler(*this, MHO);
	Mangler.getStream() << "??_R1";
	Mangler.mangleNumber(NVOffset);
	Mangler.mangleNumber(VBPtrOffset);
	Mangler.mangleNumber(VBTableOffset);
	Mangler.mangleNumber(Flags);
	Mangler.mangleName(Derived);
	Mangler.getStream() << "8";
	}

	void MicrosoftMangleContextImpl::mangleCXXRTTIBaseClassArray(
	const CXXRecordDecl *Derived, raw_ostream &Out) {
	msvc_hashing_ostream MHO(Out);
	MicrosoftCXXNameMangler Mangler(*this, MHO);
	Mangler.getStream() << "??_R2";
	Mangler.mangleName(Derived);
	Mangler.getStream() << "8";
	}

	void MicrosoftMangleContextImpl::mangleCXXRTTIClassHierarchyDescriptor(
	const CXXRecordDecl *Derived, raw_ostream &Out) {
	msvc_hashing_ostream MHO(Out);
	MicrosoftCXXNameMangler Mangler(*this, MHO);
	Mangler.getStream() << "??_R3";
	Mangler.mangleName(Derived);
	Mangler.getStream() << "8";
	}

	void MicrosoftMangleContextImpl::mangleCXXRTTICompleteObjectLocator(
	const CXXRecordDecl Derived, ArrayRef<const CXXRecordDecl > BasePath,
	raw_ostream &Out) {
	// <mangled-name> ::= ?_R4 <class-name> <storage-class>
	// <cvr-qualifiers> [<name>] @
	// NOTE: <cvr-qualifiers> here is always 'B' (const). <storage-class>
	// is always '6' for vftables.
	llvm::SmallString<64> VFTableMangling;
	llvm::raw_svector_ostream Stream(VFTableMangling);
	mangleCXXVFTable(Derived, BasePath, Stream);

	if (VFTableMangling.startswith("??@")) {
	assert(VFTableMangling.endswith("@"));
	Out << VFTableMangling << "??_R4@";
	return;
	}

	assert(VFTableMangling.startswith("??_7") \|\|
	VFTableMangling.startswith("??_S"));

	Out << "??_R4" << StringRef(VFTableMangling).drop_front(4);
	}

	void MicrosoftMangleContextImpl::mangleSEHFilterExpression(
	const NamedDecl *EnclosingDecl, raw_ostream &Out) {
	msvc_hashing_ostream MHO(Out);
	MicrosoftCXXNameMangler Mangler(*this, MHO);
	// The function body is in the same comdat as the function with the handler,
	// so the numbering here doesn't have to be the same across TUs.
	//
	// <mangled-name> ::= ?filt$ <filter-number> @0
	Mangler.getStream() << "?filt$" << SEHFilterIds[EnclosingDecl]++ << "@0@";
	Mangler.mangleName(EnclosingDecl);
	}

	void MicrosoftMangleContextImpl::mangleSEHFinallyBlock(
	const NamedDecl *EnclosingDecl, raw_ostream &Out) {
	msvc_hashing_ostream MHO(Out);
	MicrosoftCXXNameMangler Mangler(*this, MHO);
	// The function body is in the same comdat as the function with the handler,
	// so the numbering here doesn't have to be the same across TUs.
	//
	// <mangled-name> ::= ?fin$ <filter-number> @0
	Mangler.getStream() << "?fin$" << SEHFinallyIds[EnclosingDecl]++ << "@0@";
	Mangler.mangleName(EnclosingDecl);
	}

	void MicrosoftMangleContextImpl::mangleTypeName(QualType T, raw_ostream &Out) {
	// This is just a made up unique string for the purposes of tbaa. undname
	// does not know how to demangle it.
	MicrosoftCXXNameMangler Mangler(*this, Out);
	Mangler.getStream() << '?';
	Mangler.mangleType(T, SourceRange());
	}

	void MicrosoftMangleContextImpl::mangleCXXCtor(const CXXConstructorDecl *D,
	CXXCtorType Type,
	raw_ostream &Out) {
	msvc_hashing_ostream MHO(Out);
	MicrosoftCXXNameMangler mangler(*this, MHO, D, Type);
	mangler.mangle(D);
	}

	void MicrosoftMangleContextImpl::mangleCXXDtor(const CXXDestructorDecl *D,
	CXXDtorType Type,
	raw_ostream &Out) {
	msvc_hashing_ostream MHO(Out);
	MicrosoftCXXNameMangler mangler(*this, MHO, D, Type);
	mangler.mangle(D);
	}

	void MicrosoftMangleContextImpl::mangleReferenceTemporary(
	const VarDecl *VD, unsigned ManglingNumber, raw_ostream &Out) {
	msvc_hashing_ostream MHO(Out);
	MicrosoftCXXNameMangler Mangler(*this, MHO);

	Mangler.getStream() << "?$RT" << ManglingNumber << '@';
	Mangler.mangle(VD, "");
	}

	void MicrosoftMangleContextImpl::mangleThreadSafeStaticGuardVariable(
	const VarDecl *VD, unsigned GuardNum, raw_ostream &Out) {
	msvc_hashing_ostream MHO(Out);
	MicrosoftCXXNameMangler Mangler(*this, MHO);

	Mangler.getStream() << "?$TSS" << GuardNum << '@';
	Mangler.mangleNestedName(VD);
	Mangler.getStream() << "@4HA";
	}

	void MicrosoftMangleContextImpl::mangleStaticGuardVariable(const VarDecl *VD,
	raw_ostream &Out) {
	// <guard-name> ::= ?_B <postfix> @5 <scope-depth>
	// ::= ?__J <postfix> @5 <scope-depth>
	// ::= ?$S <guard-num> @ <postfix> @4IA

	// The first mangling is what MSVC uses to guard static locals in inline
	// functions. It uses a different mangling in external functions to support
	// guarding more than 32 variables. MSVC rejects inline functions with more
	// than 32 static locals. We don't fully implement the second mangling
	// because those guards are not externally visible, and instead use LLVM's
	// default renaming when creating a new guard variable.
	msvc_hashing_ostream MHO(Out);
	MicrosoftCXXNameMangler Mangler(*this, MHO);

	bool Visible = VD->isExternallyVisible();
	if (Visible) {
	Mangler.getStream() << (VD->getTLSKind() ? "??__J" : "??_B");
	} else {
	Mangler.getStream() << "?$S1@";
	}
	unsigned ScopeDepth = 0;
	if (Visible && !getNextDiscriminator(VD, ScopeDepth))
	// If we do not have a discriminator and are emitting a guard variable for
	// use at global scope, then mangling the nested name will not be enough to
	// remove ambiguities.
	Mangler.mangle(VD, "");
	else
	Mangler.mangleNestedName(VD);
	Mangler.getStream() << (Visible ? "@5" : "@4IA");
	if (ScopeDepth)
	Mangler.mangleNumber(ScopeDepth);
	}

	void MicrosoftMangleContextImpl::mangleInitFiniStub(const VarDecl *D,
	char CharCode,
	raw_ostream &Out) {
	msvc_hashing_ostream MHO(Out);
	MicrosoftCXXNameMangler Mangler(*this, MHO);
	Mangler.getStream() << "??__" << CharCode;
	if (D->isStaticDataMember()) {
	Mangler.getStream() << '?';
	Mangler.mangleName(D);
	Mangler.mangleVariableEncoding(D);
	Mangler.getStream() << "@@";
	} else {
	Mangler.mangleName(D);
	}
	// This is the function class mangling. These stubs are global, non-variadic,
	// cdecl functions that return void and take no args.
	Mangler.getStream() << "YAXXZ";
	}

	void MicrosoftMangleContextImpl::mangleDynamicInitializer(const VarDecl *D,
	raw_ostream &Out) {
	// <initializer-name> ::= ?__E <name> YAXXZ
	mangleInitFiniStub(D, 'E', Out);
	}

	void
	MicrosoftMangleContextImpl::mangleDynamicAtExitDestructor(const VarDecl *D,
	raw_ostream &Out) {
	// <destructor-name> ::= ?__F <name> YAXXZ
	mangleInitFiniStub(D, 'F', Out);
	}

	void MicrosoftMangleContextImpl::mangleStringLiteral(const StringLiteral *SL,
	raw_ostream &Out) {
	// <char-type> ::= 0 # char, char16_t, char32_t
	// # (little endian char data in mangling)
	// ::= 1 # wchar_t (big endian char data in mangling)
	//
	// <literal-length> ::= <non-negative integer> # the length of the literal
	//
	// <encoded-crc> ::= <hex digit>+ @ # crc of the literal including
	// # trailing null bytes
	//
	// <encoded-string> ::= <simple character> # uninteresting character
	// ::= '?$' <hex digit> <hex digit> # these two nibbles
	// # encode the byte for the
	// # character
	// ::= '?' [a-z] # \xe1 - \xfa
	// ::= '?' [A-Z] # \xc1 - \xda
	// ::= '?' [0-9] # [,/\:. \n\t'-]
	//
	// <literal> ::= '??_C@_' <char-type> <literal-length> <encoded-crc>
	// <encoded-string> '@'
	MicrosoftCXXNameMangler Mangler(*this, Out);
	Mangler.getStream() << "??_C@_";

	// The actual string length might be different from that of the string literal
	// in cases like:
	// char foo[3] = "foobar";
	// char bar[42] = "foobar";
	// Where it is truncated or zero-padded to fit the array. This is the length
	// used for mangling, and any trailing null-bytes also need to be mangled.
	unsigned StringLength = getASTContext()
	.getAsConstantArrayType(SL->getType())
	->getSize()
	.getZExtValue();
	unsigned StringByteLength = StringLength * SL->getCharByteWidth();

	// <char-type>: The "kind" of string literal is encoded into the mangled name.
	if (SL->isWide())
	Mangler.getStream() << '1';
	else
	Mangler.getStream() << '0';

	// <literal-length>: The next part of the mangled name consists of the length
	// of the string in bytes.
	Mangler.mangleNumber(StringByteLength);

	auto GetLittleEndianByte = [&SL](unsigned Index) {
	unsigned CharByteWidth = SL->getCharByteWidth();
	if (Index / CharByteWidth >= SL->getLength())
	return static_cast<char>(0);
	uint32_t CodeUnit = SL->getCodeUnit(Index / CharByteWidth);
	unsigned OffsetInCodeUnit = Index % CharByteWidth;
	return static_cast<char>((CodeUnit >> (8 * OffsetInCodeUnit)) & 0xff);
	};

	auto GetBigEndianByte = [&SL](unsigned Index) {
	unsigned CharByteWidth = SL->getCharByteWidth();
	if (Index / CharByteWidth >= SL->getLength())
	return static_cast<char>(0);
	uint32_t CodeUnit = SL->getCodeUnit(Index / CharByteWidth);
	unsigned OffsetInCodeUnit = (CharByteWidth - 1) - (Index % CharByteWidth);
	return static_cast<char>((CodeUnit >> (8 * OffsetInCodeUnit)) & 0xff);
	};

	// CRC all the bytes of the StringLiteral.
	llvm::JamCRC JC;
	for (unsigned I = 0, E = StringByteLength; I != E; ++I)
	JC.update(GetLittleEndianByte(I));

	// <encoded-crc>: The CRC is encoded utilizing the standard number mangling
	// scheme.
	Mangler.mangleNumber(JC.getCRC());

	// <encoded-string>: The mangled name also contains the first 32 bytes
	// (including null-terminator bytes) of the encoded StringLiteral.
	// Each character is encoded by splitting them into bytes and then encoding
	// the constituent bytes.
	auto MangleByte = [&Mangler](char Byte) {
	// There are five different manglings for characters:
	// - [a-zA-Z0-9_$]: A one-to-one mapping.
	// - ?[a-z]: The range from \xe1 to \xfa.
	// - ?[A-Z]: The range from \xc1 to \xda.
	// - ?[0-9]: The set of [,/\:. \n\t'-].
	// - ?$XX: A fallback which maps nibbles.
	if (isIdentifierBody(Byte, /AllowDollar=/true)) {
	Mangler.getStream() << Byte;
	} else if (isLetter(Byte & 0x7f)) {
	Mangler.getStream() << '?' << static_cast<char>(Byte & 0x7f);
	} else {
	const char SpecialChars[] = {',', '/', '\\', ':', '.',
	' ', '\n', '\t', '\'', '-'};
	const char *Pos =
	std::find(std::begin(SpecialChars), std::end(SpecialChars), Byte);
	if (Pos != std::end(SpecialChars)) {
	Mangler.getStream() << '?' << (Pos - std::begin(SpecialChars));
	} else {
	Mangler.getStream() << "?$";
	Mangler.getStream() << static_cast<char>('A' + ((Byte >> 4) & 0xf));
	Mangler.getStream() << static_cast<char>('A' + (Byte & 0xf));
	}
	}
	};

	// Enforce our 32 bytes max, except wchar_t which gets 32 chars instead.
	unsigned MaxBytesToMangle = SL->isWide() ? 64U : 32U;
	unsigned NumBytesToMangle = std::min(MaxBytesToMangle, StringByteLength);
	for (unsigned I = 0; I != NumBytesToMangle; ++I) {
	if (SL->isWide())
	MangleByte(GetBigEndianByte(I));
	else
	MangleByte(GetLittleEndianByte(I));
	}

	Mangler.getStream() << '@';
	}

	MicrosoftMangleContext *
	MicrosoftMangleContext::create(ASTContext &Context, DiagnosticsEngine &Diags) {
	return new MicrosoftMangleContextImpl(Context, Diags);
	}
	Index: head/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp
	===================================================================
	--- head/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp (revision 349792)
	+++ head/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp (revision 349793)
	@@ -1,4505 +1,4497 @@
	//===--- CGDebugInfo.cpp - Emit Debug Information for a Module ------------===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// This coordinates the debug information generation while generating code.
	//
	//===----------------------------------------------------------------------===//

	#include "CGDebugInfo.h"
	#include "CGBlocks.h"
	#include "CGCXXABI.h"
	#include "CGObjCRuntime.h"
	#include "CGRecordLayout.h"
	#include "CodeGenFunction.h"
	#include "CodeGenModule.h"
	#include "ConstantEmitter.h"
	#include "clang/AST/ASTContext.h"
	#include "clang/AST/DeclFriend.h"
	#include "clang/AST/DeclObjC.h"
	#include "clang/AST/DeclTemplate.h"
	#include "clang/AST/Expr.h"
	#include "clang/AST/RecordLayout.h"
	#include "clang/Basic/CodeGenOptions.h"
	#include "clang/Basic/FileManager.h"
	#include "clang/Basic/SourceManager.h"
	#include "clang/Basic/Version.h"
	#include "clang/Frontend/FrontendOptions.h"
	#include "clang/Lex/HeaderSearchOptions.h"
	#include "clang/Lex/ModuleMap.h"
	#include "clang/Lex/PreprocessorOptions.h"
	#include "llvm/ADT/DenseSet.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/ADT/StringExtras.h"
	#include "llvm/IR/Constants.h"
	#include "llvm/IR/DataLayout.h"
	#include "llvm/IR/DerivedTypes.h"
	#include "llvm/IR/Instructions.h"
	#include "llvm/IR/Intrinsics.h"
	#include "llvm/IR/Metadata.h"
	#include "llvm/IR/Module.h"
	#include "llvm/Support/FileSystem.h"
	#include "llvm/Support/MD5.h"
	#include "llvm/Support/Path.h"
	using namespace clang;
	using namespace clang::CodeGen;

	static uint32_t getTypeAlignIfRequired(const Type *Ty, const ASTContext &Ctx) {
	auto TI = Ctx.getTypeInfo(Ty);
	return TI.AlignIsRequired ? TI.Align : 0;
	}

	static uint32_t getTypeAlignIfRequired(QualType Ty, const ASTContext &Ctx) {
	return getTypeAlignIfRequired(Ty.getTypePtr(), Ctx);
	}

	static uint32_t getDeclAlignIfRequired(const Decl *D, const ASTContext &Ctx) {
	return D->hasAttr<AlignedAttr>() ? D->getMaxAlignment() : 0;
	}

	CGDebugInfo::CGDebugInfo(CodeGenModule &CGM)
	: CGM(CGM), DebugKind(CGM.getCodeGenOpts().getDebugInfo()),
	DebugTypeExtRefs(CGM.getCodeGenOpts().DebugTypeExtRefs),
	DBuilder(CGM.getModule()) {
	for (const auto &KV : CGM.getCodeGenOpts().DebugPrefixMap)
	DebugPrefixMap[KV.first] = KV.second;
	CreateCompileUnit();
	}

	CGDebugInfo::~CGDebugInfo() {
	assert(LexicalBlockStack.empty() &&
	"Region stack mismatch, stack not empty!");
	}

	ApplyDebugLocation::ApplyDebugLocation(CodeGenFunction &CGF,
	SourceLocation TemporaryLocation)
	: CGF(&CGF) {
	init(TemporaryLocation);
	}

	ApplyDebugLocation::ApplyDebugLocation(CodeGenFunction &CGF,
	bool DefaultToEmpty,
	SourceLocation TemporaryLocation)
	: CGF(&CGF) {
	init(TemporaryLocation, DefaultToEmpty);
	}

	void ApplyDebugLocation::init(SourceLocation TemporaryLocation,
	bool DefaultToEmpty) {
	auto *DI = CGF->getDebugInfo();
	if (!DI) {
	CGF = nullptr;
	return;
	}

	OriginalLocation = CGF->Builder.getCurrentDebugLocation();

	if (OriginalLocation && !DI->CGM.getExpressionLocationsEnabled())
	return;

	if (TemporaryLocation.isValid()) {
	DI->EmitLocation(CGF->Builder, TemporaryLocation);
	return;
	}

	if (DefaultToEmpty) {
	CGF->Builder.SetCurrentDebugLocation(llvm::DebugLoc());
	return;
	}

	// Construct a location that has a valid scope, but no line info.
	assert(!DI->LexicalBlockStack.empty());
	CGF->Builder.SetCurrentDebugLocation(llvm::DebugLoc::get(
	0, 0, DI->LexicalBlockStack.back(), DI->getInlinedAt()));
	}

	ApplyDebugLocation::ApplyDebugLocation(CodeGenFunction &CGF, const Expr *E)
	: CGF(&CGF) {
	init(E->getExprLoc());
	}

	ApplyDebugLocation::ApplyDebugLocation(CodeGenFunction &CGF, llvm::DebugLoc Loc)
	: CGF(&CGF) {
	if (!CGF.getDebugInfo()) {
	this->CGF = nullptr;
	return;
	}
	OriginalLocation = CGF.Builder.getCurrentDebugLocation();
	if (Loc)
	CGF.Builder.SetCurrentDebugLocation(std::move(Loc));
	}

	ApplyDebugLocation::~ApplyDebugLocation() {
	// Query CGF so the location isn't overwritten when location updates are
	// temporarily disabled (for C++ default function arguments)
	if (CGF)
	CGF->Builder.SetCurrentDebugLocation(std::move(OriginalLocation));
	}

	ApplyInlineDebugLocation::ApplyInlineDebugLocation(CodeGenFunction &CGF,
	GlobalDecl InlinedFn)
	: CGF(&CGF) {
	if (!CGF.getDebugInfo()) {
	this->CGF = nullptr;
	return;
	}
	auto &DI = *CGF.getDebugInfo();
	SavedLocation = DI.getLocation();
	assert((DI.getInlinedAt() ==
	CGF.Builder.getCurrentDebugLocation()->getInlinedAt()) &&
	"CGDebugInfo and IRBuilder are out of sync");

	DI.EmitInlineFunctionStart(CGF.Builder, InlinedFn);
	}

	ApplyInlineDebugLocation::~ApplyInlineDebugLocation() {
	if (!CGF)
	return;
	auto &DI = *CGF->getDebugInfo();
	DI.EmitInlineFunctionEnd(CGF->Builder);
	DI.EmitLocation(CGF->Builder, SavedLocation);
	}

	void CGDebugInfo::setLocation(SourceLocation Loc) {
	// If the new location isn't valid return.
	if (Loc.isInvalid())
	return;

	CurLoc = CGM.getContext().getSourceManager().getExpansionLoc(Loc);

	// If we've changed files in the middle of a lexical scope go ahead
	// and create a new lexical scope with file node if it's different
	// from the one in the scope.
	if (LexicalBlockStack.empty())
	return;

	SourceManager &SM = CGM.getContext().getSourceManager();
	auto *Scope = cast<llvm::DIScope>(LexicalBlockStack.back());
	PresumedLoc PCLoc = SM.getPresumedLoc(CurLoc);
	if (PCLoc.isInvalid() \|\| Scope->getFile() == getOrCreateFile(CurLoc))
	return;

	if (auto *LBF = dyn_cast<llvm::DILexicalBlockFile>(Scope)) {
	LexicalBlockStack.pop_back();
	LexicalBlockStack.emplace_back(DBuilder.createLexicalBlockFile(
	LBF->getScope(), getOrCreateFile(CurLoc)));
	} else if (isa<llvm::DILexicalBlock>(Scope) \|\|
	isa<llvm::DISubprogram>(Scope)) {
	LexicalBlockStack.pop_back();
	LexicalBlockStack.emplace_back(
	DBuilder.createLexicalBlockFile(Scope, getOrCreateFile(CurLoc)));
	}
	}

	llvm::DIScope CGDebugInfo::getDeclContextDescriptor(const Decl D) {
	llvm::DIScope *Mod = getParentModuleOrNull(D);
	return getContextDescriptor(cast<Decl>(D->getDeclContext()),
	Mod ? Mod : TheCU);
	}

	llvm::DIScope CGDebugInfo::getContextDescriptor(const Decl Context,
	llvm::DIScope *Default) {
	if (!Context)
	return Default;

	auto I = RegionMap.find(Context);
	if (I != RegionMap.end()) {
	llvm::Metadata *V = I->second;
	return dyn_cast_or_null<llvm::DIScope>(V);
	}

	// Check namespace.
	if (const auto *NSDecl = dyn_cast<NamespaceDecl>(Context))
	return getOrCreateNamespace(NSDecl);

	if (const auto *RDecl = dyn_cast<RecordDecl>(Context))
	if (!RDecl->isDependentType())
	return getOrCreateType(CGM.getContext().getTypeDeclType(RDecl),
	TheCU->getFile());
	return Default;
	}

	PrintingPolicy CGDebugInfo::getPrintingPolicy() const {
	PrintingPolicy PP = CGM.getContext().getPrintingPolicy();

	// If we're emitting codeview, it's important to try to match MSVC's naming so
	// that visualizers written for MSVC will trigger for our class names. In
	// particular, we can't have spaces between arguments of standard templates
	// like basic_string and vector.
	if (CGM.getCodeGenOpts().EmitCodeView)
	PP.MSVCFormatting = true;

	// Apply -fdebug-prefix-map.
	PP.RemapFilePaths = true;
	PP.remapPath = [this](StringRef Path) { return remapDIPath(Path); };
	return PP;
	}

	StringRef CGDebugInfo::getFunctionName(const FunctionDecl *FD) {
	assert(FD && "Invalid FunctionDecl!");
	IdentifierInfo *FII = FD->getIdentifier();
	FunctionTemplateSpecializationInfo *Info =
	FD->getTemplateSpecializationInfo();

	// Emit the unqualified name in normal operation. LLVM and the debugger can
	// compute the fully qualified name from the scope chain. If we're only
	// emitting line table info, there won't be any scope chains, so emit the
	// fully qualified name here so that stack traces are more accurate.
	// FIXME: Do this when emitting DWARF as well as when emitting CodeView after
	// evaluating the size impact.
	bool UseQualifiedName = DebugKind == codegenoptions::DebugLineTablesOnly &&
	CGM.getCodeGenOpts().EmitCodeView;

	if (!Info && FII && !UseQualifiedName)
	return FII->getName();

	SmallString<128> NS;
	llvm::raw_svector_ostream OS(NS);
	if (!UseQualifiedName)
	FD->printName(OS);
	else
	FD->printQualifiedName(OS, getPrintingPolicy());

	// Add any template specialization args.
	if (Info) {
	const TemplateArgumentList *TArgs = Info->TemplateArguments;
	printTemplateArgumentList(OS, TArgs->asArray(), getPrintingPolicy());
	}

	// Copy this name on the side and use its reference.
	return internString(OS.str());
	}

	StringRef CGDebugInfo::getObjCMethodName(const ObjCMethodDecl *OMD) {
	SmallString<256> MethodName;
	llvm::raw_svector_ostream OS(MethodName);
	OS << (OMD->isInstanceMethod() ? '-' : '+') << '[';
	const DeclContext *DC = OMD->getDeclContext();
	if (const auto *OID = dyn_cast<ObjCImplementationDecl>(DC)) {
	OS << OID->getName();
	} else if (const auto *OID = dyn_cast<ObjCInterfaceDecl>(DC)) {
	OS << OID->getName();
	} else if (const auto *OC = dyn_cast<ObjCCategoryDecl>(DC)) {
	if (OC->IsClassExtension()) {
	OS << OC->getClassInterface()->getName();
	} else {
	OS << OC->getIdentifier()->getNameStart() << '('
	<< OC->getIdentifier()->getNameStart() << ')';
	}
	} else if (const auto *OCD = dyn_cast<ObjCCategoryImplDecl>(DC)) {
	OS << OCD->getClassInterface()->getName() << '(' << OCD->getName() << ')';
	} else if (isa<ObjCProtocolDecl>(DC)) {
	// We can extract the type of the class from the self pointer.
	if (ImplicitParamDecl *SelfDecl = OMD->getSelfDecl()) {
	QualType ClassTy =
	cast<ObjCObjectPointerType>(SelfDecl->getType())->getPointeeType();
	ClassTy.print(OS, PrintingPolicy(LangOptions()));
	}
	}
	OS << ' ' << OMD->getSelector().getAsString() << ']';

	return internString(OS.str());
	}

	StringRef CGDebugInfo::getSelectorName(Selector S) {
	return internString(S.getAsString());
	}

	StringRef CGDebugInfo::getClassName(const RecordDecl *RD) {
	if (isa<ClassTemplateSpecializationDecl>(RD)) {
	SmallString<128> Name;
	llvm::raw_svector_ostream OS(Name);
	RD->getNameForDiagnostic(OS, getPrintingPolicy(),
	/Qualified/ false);

	// Copy this name on the side and use its reference.
	return internString(Name);
	}

	// quick optimization to avoid having to intern strings that are already
	// stored reliably elsewhere
	if (const IdentifierInfo *II = RD->getIdentifier())
	return II->getName();

	// The CodeView printer in LLVM wants to see the names of unnamed types: it is
	// used to reconstruct the fully qualified type names.
	if (CGM.getCodeGenOpts().EmitCodeView) {
	if (const TypedefNameDecl *D = RD->getTypedefNameForAnonDecl()) {
	assert(RD->getDeclContext() == D->getDeclContext() &&
	"Typedef should not be in another decl context!");
	assert(D->getDeclName().getAsIdentifierInfo() &&
	"Typedef was not named!");
	return D->getDeclName().getAsIdentifierInfo()->getName();
	}

	if (CGM.getLangOpts().CPlusPlus) {
	StringRef Name;

	ASTContext &Context = CGM.getContext();
	if (const DeclaratorDecl *DD = Context.getDeclaratorForUnnamedTagDecl(RD))
	// Anonymous types without a name for linkage purposes have their
	// declarator mangled in if they have one.
	Name = DD->getName();
	else if (const TypedefNameDecl *TND =
	Context.getTypedefNameForUnnamedTagDecl(RD))
	// Anonymous types without a name for linkage purposes have their
	// associate typedef mangled in if they have one.
	Name = TND->getName();

	if (!Name.empty()) {
	SmallString<256> UnnamedType("<unnamed-type-");
	UnnamedType += Name;
	UnnamedType += '>';
	return internString(UnnamedType);
	}
	}
	}

	return StringRef();
	}

	Optional<llvm::DIFile::ChecksumKind>
	CGDebugInfo::computeChecksum(FileID FID, SmallString<32> &Checksum) const {
	Checksum.clear();

	if (!CGM.getCodeGenOpts().EmitCodeView &&
	CGM.getCodeGenOpts().DwarfVersion < 5)
	return None;

	SourceManager &SM = CGM.getContext().getSourceManager();
	bool Invalid;
	llvm::MemoryBuffer *MemBuffer = SM.getBuffer(FID, &Invalid);
	if (Invalid)
	return None;

	llvm::MD5 Hash;
	llvm::MD5::MD5Result Result;

	Hash.update(MemBuffer->getBuffer());
	Hash.final(Result);

	Hash.stringifyResult(Result, Checksum);
	return llvm::DIFile::CSK_MD5;
	}

	Optional<StringRef> CGDebugInfo::getSource(const SourceManager &SM,
	FileID FID) {
	if (!CGM.getCodeGenOpts().EmbedSource)
	return None;

	bool SourceInvalid = false;
	StringRef Source = SM.getBufferData(FID, &SourceInvalid);

	if (SourceInvalid)
	return None;

	return Source;
	}

	llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) {
	if (!Loc.isValid())
	// If Location is not valid then use main input file.
	return TheCU->getFile();

	SourceManager &SM = CGM.getContext().getSourceManager();
	PresumedLoc PLoc = SM.getPresumedLoc(Loc);

	StringRef FileName = PLoc.getFilename();
	if (PLoc.isInvalid() \|\| FileName.empty())
	// If the location is not valid then use main input file.
	return TheCU->getFile();

	// Cache the results.
	auto It = DIFileCache.find(FileName.data());
	if (It != DIFileCache.end()) {
	// Verify that the information still exists.
	if (llvm::Metadata *V = It->second)
	return cast<llvm::DIFile>(V);
	}

	SmallString<32> Checksum;
	Optional<llvm::DIFile::ChecksumKind> CSKind =
	computeChecksum(SM.getFileID(Loc), Checksum);
	Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo;
	if (CSKind)
	CSInfo.emplace(*CSKind, Checksum);
	return createFile(FileName, CSInfo, getSource(SM, SM.getFileID(Loc)));
	}

	llvm::DIFile *
	CGDebugInfo::createFile(StringRef FileName,
	Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo,
	Optional<StringRef> Source) {
	StringRef Dir;
	StringRef File;
	std::string RemappedFile = remapDIPath(FileName);
	std::string CurDir = remapDIPath(getCurrentDirname());
	SmallString<128> DirBuf;
	SmallString<128> FileBuf;
	if (llvm::sys::path::is_absolute(RemappedFile)) {
	// Strip the common prefix (if it is more than just "/") from current
	// directory and FileName for a more space-efficient encoding.
	auto FileIt = llvm::sys::path::begin(RemappedFile);
	auto FileE = llvm::sys::path::end(RemappedFile);
	auto CurDirIt = llvm::sys::path::begin(CurDir);
	auto CurDirE = llvm::sys::path::end(CurDir);
	for (; CurDirIt != CurDirE && CurDirIt == FileIt; ++CurDirIt, ++FileIt)
	llvm::sys::path::append(DirBuf, *CurDirIt);
	if (std::distance(llvm::sys::path::begin(CurDir), CurDirIt) == 1) {
	// The common prefix only the root; stripping it would cause
	// LLVM diagnostic locations to be more confusing.
	Dir = {};
	File = RemappedFile;
	} else {
	for (; FileIt != FileE; ++FileIt)
	llvm::sys::path::append(FileBuf, *FileIt);
	Dir = DirBuf;
	File = FileBuf;
	}
	} else {
	Dir = CurDir;
	File = RemappedFile;
	}
	llvm::DIFile *F = DBuilder.createFile(File, Dir, CSInfo, Source);
	DIFileCache[FileName.data()].reset(F);
	return F;
	}

	std::string CGDebugInfo::remapDIPath(StringRef Path) const {
	for (const auto &Entry : DebugPrefixMap)
	if (Path.startswith(Entry.first))
	return (Twine(Entry.second) + Path.substr(Entry.first.size())).str();
	return Path.str();
	}

	unsigned CGDebugInfo::getLineNumber(SourceLocation Loc) {
	if (Loc.isInvalid() && CurLoc.isInvalid())
	return 0;
	SourceManager &SM = CGM.getContext().getSourceManager();
	PresumedLoc PLoc = SM.getPresumedLoc(Loc.isValid() ? Loc : CurLoc);
	return PLoc.isValid() ? PLoc.getLine() : 0;
	}

	unsigned CGDebugInfo::getColumnNumber(SourceLocation Loc, bool Force) {
	// We may not want column information at all.
	if (!Force && !CGM.getCodeGenOpts().DebugColumnInfo)
	return 0;

	// If the location is invalid then use the current column.
	if (Loc.isInvalid() && CurLoc.isInvalid())
	return 0;
	SourceManager &SM = CGM.getContext().getSourceManager();
	PresumedLoc PLoc = SM.getPresumedLoc(Loc.isValid() ? Loc : CurLoc);
	return PLoc.isValid() ? PLoc.getColumn() : 0;
	}

	StringRef CGDebugInfo::getCurrentDirname() {
	if (!CGM.getCodeGenOpts().DebugCompilationDir.empty())
	return CGM.getCodeGenOpts().DebugCompilationDir;

	if (!CWDName.empty())
	return CWDName;
	SmallString<256> CWD;
	llvm::sys::fs::current_path(CWD);
	return CWDName = internString(CWD);
	}

	void CGDebugInfo::CreateCompileUnit() {
	SmallString<32> Checksum;
	Optional<llvm::DIFile::ChecksumKind> CSKind;
	Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo;

	// Should we be asking the SourceManager for the main file name, instead of
	// accepting it as an argument? This just causes the main file name to
	// mismatch with source locations and create extra lexical scopes or
	// mismatched debug info (a CU with a DW_AT_file of "-", because that's what
	// the driver passed, but functions/other things have DW_AT_file of "<stdin>"
	// because that's what the SourceManager says)

	// Get absolute path name.
	SourceManager &SM = CGM.getContext().getSourceManager();
	std::string MainFileName = CGM.getCodeGenOpts().MainFileName;
	if (MainFileName.empty())
	MainFileName = "<stdin>";

	// The main file name provided via the "-main-file-name" option contains just
	// the file name itself with no path information. This file name may have had
	// a relative path, so we look into the actual file entry for the main
	// file to determine the real absolute path for the file.
	std::string MainFileDir;
	if (const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID())) {
	MainFileDir = remapDIPath(MainFile->getDir()->getName());
	if (MainFileDir != ".") {
	llvm::SmallString<1024> MainFileDirSS(MainFileDir);
	llvm::sys::path::append(MainFileDirSS, MainFileName);
	MainFileName = MainFileDirSS.str();
	}
	// If the main file name provided is identical to the input file name, and
	// if the input file is a preprocessed source, use the module name for
	// debug info. The module name comes from the name specified in the first
	// linemarker if the input is a preprocessed source.
	if (MainFile->getName() == MainFileName &&
	FrontendOptions::getInputKindForExtension(
	MainFile->getName().rsplit('.').second)
	.isPreprocessed())
	MainFileName = CGM.getModule().getName().str();

	CSKind = computeChecksum(SM.getMainFileID(), Checksum);
	}

	llvm::dwarf::SourceLanguage LangTag;
	const LangOptions &LO = CGM.getLangOpts();
	if (LO.CPlusPlus) {
	if (LO.ObjC)
	LangTag = llvm::dwarf::DW_LANG_ObjC_plus_plus;
	else
	LangTag = llvm::dwarf::DW_LANG_C_plus_plus;
	} else if (LO.ObjC) {
	LangTag = llvm::dwarf::DW_LANG_ObjC;
	} else if (LO.RenderScript) {
	LangTag = llvm::dwarf::DW_LANG_GOOGLE_RenderScript;
	} else if (LO.C99) {
	LangTag = llvm::dwarf::DW_LANG_C99;
	} else {
	LangTag = llvm::dwarf::DW_LANG_C89;
	}

	std::string Producer = getClangFullVersion();

	// Figure out which version of the ObjC runtime we have.
	unsigned RuntimeVers = 0;
	if (LO.ObjC)
	RuntimeVers = LO.ObjCRuntime.isNonFragile() ? 2 : 1;

	llvm::DICompileUnit::DebugEmissionKind EmissionKind;
	switch (DebugKind) {
	case codegenoptions::NoDebugInfo:
	case codegenoptions::LocTrackingOnly:
	EmissionKind = llvm::DICompileUnit::NoDebug;
	break;
	case codegenoptions::DebugLineTablesOnly:
	EmissionKind = llvm::DICompileUnit::LineTablesOnly;
	break;
	case codegenoptions::DebugDirectivesOnly:
	EmissionKind = llvm::DICompileUnit::DebugDirectivesOnly;
	break;
	case codegenoptions::LimitedDebugInfo:
	case codegenoptions::FullDebugInfo:
	EmissionKind = llvm::DICompileUnit::FullDebug;
	break;
	}

	uint64_t DwoId = 0;
	auto &CGOpts = CGM.getCodeGenOpts();
	// The DIFile used by the CU is distinct from the main source
	// file. Its directory part specifies what becomes the
	// DW_AT_comp_dir (the compilation directory), even if the source
	// file was specified with an absolute path.
	if (CSKind)
	CSInfo.emplace(*CSKind, Checksum);
	llvm::DIFile *CUFile = DBuilder.createFile(
	remapDIPath(MainFileName), remapDIPath(getCurrentDirname()), CSInfo,
	getSource(SM, SM.getMainFileID()));

	// Create new compile unit.
	TheCU = DBuilder.createCompileUnit(
	LangTag, CUFile, CGOpts.EmitVersionIdentMetadata ? Producer : "",
	LO.Optimize \|\| CGOpts.PrepareForLTO \|\| CGOpts.PrepareForThinLTO,
	CGOpts.DwarfDebugFlags, RuntimeVers,
	(CGOpts.getSplitDwarfMode() != CodeGenOptions::NoFission)
	? ""
	: CGOpts.SplitDwarfFile,
	EmissionKind, DwoId, CGOpts.SplitDwarfInlining,
	CGOpts.DebugInfoForProfiling,
	CGM.getTarget().getTriple().isNVPTX()
	? llvm::DICompileUnit::DebugNameTableKind::None
	: static_cast<llvm::DICompileUnit::DebugNameTableKind>(
	CGOpts.DebugNameTable),
	CGOpts.DebugRangesBaseAddress);
	}

	llvm::DIType CGDebugInfo::CreateType(const BuiltinType BT) {
	llvm::dwarf::TypeKind Encoding;
	StringRef BTName;
	switch (BT->getKind()) {
	#define BUILTIN_TYPE(Id, SingletonId)
	#define PLACEHOLDER_TYPE(Id, SingletonId) case BuiltinType::Id:
	#include "clang/AST/BuiltinTypes.def"
	case BuiltinType::Dependent:
	llvm_unreachable("Unexpected builtin type");
	case BuiltinType::NullPtr:
	return DBuilder.createNullPtrType();
	case BuiltinType::Void:
	return nullptr;
	case BuiltinType::ObjCClass:
	if (!ClassTy)
	ClassTy =
	DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
	"objc_class", TheCU, TheCU->getFile(), 0);
	return ClassTy;
	case BuiltinType::ObjCId: {
	// typedef struct objc_class *Class;
	// typedef struct objc_object {
	// Class isa;
	// } *id;

	if (ObjTy)
	return ObjTy;

	if (!ClassTy)
	ClassTy =
	DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
	"objc_class", TheCU, TheCU->getFile(), 0);

	unsigned Size = CGM.getContext().getTypeSize(CGM.getContext().VoidPtrTy);

	auto *ISATy = DBuilder.createPointerType(ClassTy, Size);

	ObjTy = DBuilder.createStructType(TheCU, "objc_object", TheCU->getFile(), 0,
	0, 0, llvm::DINode::FlagZero, nullptr,
	llvm::DINodeArray());

	DBuilder.replaceArrays(
	ObjTy, DBuilder.getOrCreateArray(&*DBuilder.createMemberType(
	ObjTy, "isa", TheCU->getFile(), 0, Size, 0, 0,
	llvm::DINode::FlagZero, ISATy)));
	return ObjTy;
	}
	case BuiltinType::ObjCSel: {
	if (!SelTy)
	SelTy = DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
	"objc_selector", TheCU,
	TheCU->getFile(), 0);
	return SelTy;
	}

	#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
	case BuiltinType::Id: \
	return getOrCreateStructPtrType("opencl_" #ImgType "_" #Suffix "_t", \
	SingletonId);
	#include "clang/Basic/OpenCLImageTypes.def"
	case BuiltinType::OCLSampler:
	return getOrCreateStructPtrType("opencl_sampler_t", OCLSamplerDITy);
	case BuiltinType::OCLEvent:
	return getOrCreateStructPtrType("opencl_event_t", OCLEventDITy);
	case BuiltinType::OCLClkEvent:
	return getOrCreateStructPtrType("opencl_clk_event_t", OCLClkEventDITy);
	case BuiltinType::OCLQueue:
	return getOrCreateStructPtrType("opencl_queue_t", OCLQueueDITy);
	case BuiltinType::OCLReserveID:
	return getOrCreateStructPtrType("opencl_reserve_id_t", OCLReserveIDDITy);
	#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
	case BuiltinType::Id: \
	return getOrCreateStructPtrType("opencl_" #ExtType, Id##Ty);
	#include "clang/Basic/OpenCLExtensionTypes.def"

	case BuiltinType::UChar:
	case BuiltinType::Char_U:
	Encoding = llvm::dwarf::DW_ATE_unsigned_char;
	break;
	case BuiltinType::Char_S:
	case BuiltinType::SChar:
	Encoding = llvm::dwarf::DW_ATE_signed_char;
	break;
	case BuiltinType::Char8:
	case BuiltinType::Char16:
	case BuiltinType::Char32:
	Encoding = llvm::dwarf::DW_ATE_UTF;
	break;
	case BuiltinType::UShort:
	case BuiltinType::UInt:
	case BuiltinType::UInt128:
	case BuiltinType::ULong:
	case BuiltinType::WChar_U:
	case BuiltinType::ULongLong:
	Encoding = llvm::dwarf::DW_ATE_unsigned;
	break;
	case BuiltinType::Short:
	case BuiltinType::Int:
	case BuiltinType::Int128:
	case BuiltinType::Long:
	case BuiltinType::WChar_S:
	case BuiltinType::LongLong:
	Encoding = llvm::dwarf::DW_ATE_signed;
	break;
	case BuiltinType::Bool:
	Encoding = llvm::dwarf::DW_ATE_boolean;
	break;
	case BuiltinType::Half:
	case BuiltinType::Float:
	case BuiltinType::LongDouble:
	case BuiltinType::Float16:
	case BuiltinType::Float128:
	case BuiltinType::Double:
	// FIXME: For targets where long double and __float128 have the same size,
	// they are currently indistinguishable in the debugger without some
	// special treatment. However, there is currently no consensus on encoding
	// and this should be updated once a DWARF encoding exists for distinct
	// floating point types of the same size.
	Encoding = llvm::dwarf::DW_ATE_float;
	break;
	case BuiltinType::ShortAccum:
	case BuiltinType::Accum:
	case BuiltinType::LongAccum:
	case BuiltinType::ShortFract:
	case BuiltinType::Fract:
	case BuiltinType::LongFract:
	case BuiltinType::SatShortFract:
	case BuiltinType::SatFract:
	case BuiltinType::SatLongFract:
	case BuiltinType::SatShortAccum:
	case BuiltinType::SatAccum:
	case BuiltinType::SatLongAccum:
	Encoding = llvm::dwarf::DW_ATE_signed_fixed;
	break;
	case BuiltinType::UShortAccum:
	case BuiltinType::UAccum:
	case BuiltinType::ULongAccum:
	case BuiltinType::UShortFract:
	case BuiltinType::UFract:
	case BuiltinType::ULongFract:
	case BuiltinType::SatUShortAccum:
	case BuiltinType::SatUAccum:
	case BuiltinType::SatULongAccum:
	case BuiltinType::SatUShortFract:
	case BuiltinType::SatUFract:
	case BuiltinType::SatULongFract:
	Encoding = llvm::dwarf::DW_ATE_unsigned_fixed;
	break;
	}

	switch (BT->getKind()) {
	case BuiltinType::Long:
	BTName = "long int";
	break;
	case BuiltinType::LongLong:
	BTName = "long long int";
	break;
	case BuiltinType::ULong:
	BTName = "long unsigned int";
	break;
	case BuiltinType::ULongLong:
	BTName = "long long unsigned int";
	break;
	default:
	BTName = BT->getName(CGM.getLangOpts());
	break;
	}
	// Bit size and offset of the type.
	uint64_t Size = CGM.getContext().getTypeSize(BT);
	return DBuilder.createBasicType(BTName, Size, Encoding);
	}

	llvm::DIType CGDebugInfo::CreateType(const ComplexType Ty) {
	// Bit size and offset of the type.
	llvm::dwarf::TypeKind Encoding = llvm::dwarf::DW_ATE_complex_float;
	if (Ty->isComplexIntegerType())
	Encoding = llvm::dwarf::DW_ATE_lo_user;

	uint64_t Size = CGM.getContext().getTypeSize(Ty);
	return DBuilder.createBasicType("complex", Size, Encoding);
	}

	llvm::DIType *CGDebugInfo::CreateQualifiedType(QualType Ty,
	llvm::DIFile *Unit) {
	QualifierCollector Qc;
	const Type *T = Qc.strip(Ty);

	// Ignore these qualifiers for now.
	Qc.removeObjCGCAttr();
	Qc.removeAddressSpace();
	Qc.removeObjCLifetime();

	// We will create one Derived type for one qualifier and recurse to handle any
	// additional ones.
	llvm::dwarf::Tag Tag;
	if (Qc.hasConst()) {
	Tag = llvm::dwarf::DW_TAG_const_type;
	Qc.removeConst();
	} else if (Qc.hasVolatile()) {
	Tag = llvm::dwarf::DW_TAG_volatile_type;
	Qc.removeVolatile();
	} else if (Qc.hasRestrict()) {
	Tag = llvm::dwarf::DW_TAG_restrict_type;
	Qc.removeRestrict();
	} else {
	assert(Qc.empty() && "Unknown type qualifier for debug info");
	return getOrCreateType(QualType(T, 0), Unit);
	}

	auto *FromTy = getOrCreateType(Qc.apply(CGM.getContext(), T), Unit);

	// No need to fill in the Name, Line, Size, Alignment, Offset in case of
	// CVR derived types.
	return DBuilder.createQualifiedType(Tag, FromTy);
	}

	llvm::DIType CGDebugInfo::CreateType(const ObjCObjectPointerType Ty,
	llvm::DIFile *Unit) {

	// The frontend treats 'id' as a typedef to an ObjCObjectType,
	// whereas 'id<protocol>' is treated as an ObjCPointerType. For the
	// debug info, we want to emit 'id' in both cases.
	if (Ty->isObjCQualifiedIdType())
	return getOrCreateType(CGM.getContext().getObjCIdType(), Unit);

	return CreatePointerLikeType(llvm::dwarf::DW_TAG_pointer_type, Ty,
	Ty->getPointeeType(), Unit);
	}

	llvm::DIType CGDebugInfo::CreateType(const PointerType Ty,
	llvm::DIFile *Unit) {
	return CreatePointerLikeType(llvm::dwarf::DW_TAG_pointer_type, Ty,
	Ty->getPointeeType(), Unit);
	}

	/// \return whether a C++ mangling exists for the type defined by TD.
	static bool hasCXXMangling(const TagDecl TD, llvm::DICompileUnit TheCU) {
	switch (TheCU->getSourceLanguage()) {
	case llvm::dwarf::DW_LANG_C_plus_plus:
	return true;
	case llvm::dwarf::DW_LANG_ObjC_plus_plus:
	return isa<CXXRecordDecl>(TD) \|\| isa<EnumDecl>(TD);
	default:
	return false;
	}
	}

	// Determines if the debug info for this tag declaration needs a type
	// identifier. The purpose of the unique identifier is to deduplicate type
	// information for identical types across TUs. Because of the C++ one definition
	// rule (ODR), it is valid to assume that the type is defined the same way in
	// every TU and its debug info is equivalent.
	//
	// C does not have the ODR, and it is common for codebases to contain multiple
	// different definitions of a struct with the same name in different TUs.
	// Therefore, if the type doesn't have a C++ mangling, don't give it an
	// identifer. Type information in C is smaller and simpler than C++ type
	// information, so the increase in debug info size is negligible.
	//
	// If the type is not externally visible, it should be unique to the current TU,
	// and should not need an identifier to participate in type deduplication.
	// However, when emitting CodeView, the format internally uses these
	// unique type name identifers for references between debug info. For example,
	// the method of a class in an anonymous namespace uses the identifer to refer
	// to its parent class. The Microsoft C++ ABI attempts to provide unique names
	// for such types, so when emitting CodeView, always use identifiers for C++
	// types. This may create problems when attempting to emit CodeView when the MS
	// C++ ABI is not in use.
	static bool needsTypeIdentifier(const TagDecl *TD, CodeGenModule &CGM,
	llvm::DICompileUnit *TheCU) {
	// We only add a type identifier for types with C++ name mangling.
	if (!hasCXXMangling(TD, TheCU))
	return false;

	// Externally visible types with C++ mangling need a type identifier.
	if (TD->isExternallyVisible())
	return true;

	// CodeView types with C++ mangling need a type identifier.
	if (CGM.getCodeGenOpts().EmitCodeView)
	return true;

	return false;
	}

	// Returns a unique type identifier string if one exists, or an empty string.
	static SmallString<256> getTypeIdentifier(const TagType *Ty, CodeGenModule &CGM,
	llvm::DICompileUnit *TheCU) {
	SmallString<256> Identifier;
	const TagDecl *TD = Ty->getDecl();

	if (!needsTypeIdentifier(TD, CGM, TheCU))
	return Identifier;

	// TODO: This is using the RTTI name. Is there a better way to get
	// a unique string for a type?
	llvm::raw_svector_ostream Out(Identifier);
	CGM.getCXXABI().getMangleContext().mangleCXXRTTIName(QualType(Ty, 0), Out);
	return Identifier;
	}

	/// \return the appropriate DWARF tag for a composite type.
	static llvm::dwarf::Tag getTagForRecord(const RecordDecl *RD) {
	llvm::dwarf::Tag Tag;
	if (RD->isStruct() \|\| RD->isInterface())
	Tag = llvm::dwarf::DW_TAG_structure_type;
	else if (RD->isUnion())
	Tag = llvm::dwarf::DW_TAG_union_type;
	else {
	// FIXME: This could be a struct type giving a default visibility different
	// than C++ class type, but needs llvm metadata changes first.
	assert(RD->isClass());
	Tag = llvm::dwarf::DW_TAG_class_type;
	}
	return Tag;
	}

	llvm::DICompositeType *
	CGDebugInfo::getOrCreateRecordFwdDecl(const RecordType *Ty,
	llvm::DIScope *Ctx) {
	const RecordDecl *RD = Ty->getDecl();
	if (llvm::DIType *T = getTypeOrNull(CGM.getContext().getRecordType(RD)))
	return cast<llvm::DICompositeType>(T);
	llvm::DIFile *DefUnit = getOrCreateFile(RD->getLocation());
	unsigned Line = getLineNumber(RD->getLocation());
	StringRef RDName = getClassName(RD);

	uint64_t Size = 0;
	uint32_t Align = 0;

	// Create the type.
	SmallString<256> Identifier = getTypeIdentifier(Ty, CGM, TheCU);
	llvm::DICompositeType *RetTy = DBuilder.createReplaceableCompositeType(
	getTagForRecord(RD), RDName, Ctx, DefUnit, Line, 0, Size, Align,
	llvm::DINode::FlagFwdDecl, Identifier);
	if (CGM.getCodeGenOpts().DebugFwdTemplateParams)
	if (auto *TSpecial = dyn_cast<ClassTemplateSpecializationDecl>(RD))
	DBuilder.replaceArrays(RetTy, llvm::DINodeArray(),
	CollectCXXTemplateParams(TSpecial, DefUnit));
	ReplaceMap.emplace_back(
	std::piecewise_construct, std::make_tuple(Ty),
	std::make_tuple(static_cast<llvm::Metadata *>(RetTy)));
	return RetTy;
	}

	llvm::DIType *CGDebugInfo::CreatePointerLikeType(llvm::dwarf::Tag Tag,
	const Type *Ty,
	QualType PointeeTy,
	llvm::DIFile *Unit) {
	// Bit size, align and offset of the type.
	// Size is always the size of a pointer. We can't use getTypeSize here
	// because that does not return the correct value for references.
	unsigned AddressSpace = CGM.getContext().getTargetAddressSpace(PointeeTy);
	uint64_t Size = CGM.getTarget().getPointerWidth(AddressSpace);
	auto Align = getTypeAlignIfRequired(Ty, CGM.getContext());
	Optional<unsigned> DWARFAddressSpace =
	CGM.getTarget().getDWARFAddressSpace(AddressSpace);

	if (Tag == llvm::dwarf::DW_TAG_reference_type \|\|
	Tag == llvm::dwarf::DW_TAG_rvalue_reference_type)
	return DBuilder.createReferenceType(Tag, getOrCreateType(PointeeTy, Unit),
	Size, Align, DWARFAddressSpace);
	else
	return DBuilder.createPointerType(getOrCreateType(PointeeTy, Unit), Size,
	Align, DWARFAddressSpace);
	}

	llvm::DIType *CGDebugInfo::getOrCreateStructPtrType(StringRef Name,
	llvm::DIType *&Cache) {
	if (Cache)
	return Cache;
	Cache = DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type, Name,
	TheCU, TheCU->getFile(), 0);
	unsigned Size = CGM.getContext().getTypeSize(CGM.getContext().VoidPtrTy);
	Cache = DBuilder.createPointerType(Cache, Size);
	return Cache;
	}

	uint64_t CGDebugInfo::collectDefaultElementTypesForBlockPointer(
	const BlockPointerType Ty, llvm::DIFile Unit, llvm::DIDerivedType *DescTy,
	unsigned LineNo, SmallVectorImpl<llvm::Metadata *> &EltTys) {
	QualType FType;

	// Advanced by calls to CreateMemberType in increments of FType, then
	// returned as the overall size of the default elements.
	uint64_t FieldOffset = 0;

	// Blocks in OpenCL have unique constraints which make the standard fields
	// redundant while requiring size and align fields for enqueue_kernel. See
	// initializeForBlockHeader in CGBlocks.cpp
	if (CGM.getLangOpts().OpenCL) {
	FType = CGM.getContext().IntTy;
	EltTys.push_back(CreateMemberType(Unit, FType, "__size", &FieldOffset));
	EltTys.push_back(CreateMemberType(Unit, FType, "__align", &FieldOffset));
	} else {
	FType = CGM.getContext().getPointerType(CGM.getContext().VoidTy);
	EltTys.push_back(CreateMemberType(Unit, FType, "__isa", &FieldOffset));
	FType = CGM.getContext().IntTy;
	EltTys.push_back(CreateMemberType(Unit, FType, "__flags", &FieldOffset));
	EltTys.push_back(CreateMemberType(Unit, FType, "__reserved", &FieldOffset));
	FType = CGM.getContext().getPointerType(Ty->getPointeeType());
	EltTys.push_back(CreateMemberType(Unit, FType, "__FuncPtr", &FieldOffset));
	FType = CGM.getContext().getPointerType(CGM.getContext().VoidTy);
	uint64_t FieldSize = CGM.getContext().getTypeSize(Ty);
	uint32_t FieldAlign = CGM.getContext().getTypeAlign(Ty);
	EltTys.push_back(DBuilder.createMemberType(
	Unit, "__descriptor", nullptr, LineNo, FieldSize, FieldAlign,
	FieldOffset, llvm::DINode::FlagZero, DescTy));
	FieldOffset += FieldSize;
	}

	return FieldOffset;
	}

	llvm::DIType CGDebugInfo::CreateType(const BlockPointerType Ty,
	llvm::DIFile *Unit) {
	SmallVector<llvm::Metadata *, 8> EltTys;
	QualType FType;
	uint64_t FieldOffset;
	llvm::DINodeArray Elements;

	FieldOffset = 0;
	FType = CGM.getContext().UnsignedLongTy;
	EltTys.push_back(CreateMemberType(Unit, FType, "reserved", &FieldOffset));
	EltTys.push_back(CreateMemberType(Unit, FType, "Size", &FieldOffset));

	Elements = DBuilder.getOrCreateArray(EltTys);
	EltTys.clear();

	llvm::DINode::DIFlags Flags = llvm::DINode::FlagAppleBlock;

	auto *EltTy =
	DBuilder.createStructType(Unit, "__block_descriptor", nullptr, 0,
	FieldOffset, 0, Flags, nullptr, Elements);

	// Bit size, align and offset of the type.
	uint64_t Size = CGM.getContext().getTypeSize(Ty);

	auto *DescTy = DBuilder.createPointerType(EltTy, Size);

	FieldOffset = collectDefaultElementTypesForBlockPointer(Ty, Unit, DescTy,
	0, EltTys);

	Elements = DBuilder.getOrCreateArray(EltTys);

	// The __block_literal_generic structs are marked with a special
	// DW_AT_APPLE_BLOCK attribute and are an implementation detail only
	// the debugger needs to know about. To allow type uniquing, emit
	// them without a name or a location.
	EltTy = DBuilder.createStructType(Unit, "", nullptr, 0, FieldOffset, 0,
	Flags, nullptr, Elements);

	return DBuilder.createPointerType(EltTy, Size);
	}

	llvm::DIType CGDebugInfo::CreateType(const TemplateSpecializationType Ty,
	llvm::DIFile *Unit) {
	assert(Ty->isTypeAlias());
	llvm::DIType *Src = getOrCreateType(Ty->getAliasedType(), Unit);

	SmallString<128> NS;
	llvm::raw_svector_ostream OS(NS);
	Ty->getTemplateName().print(OS, getPrintingPolicy(), /qualified/ false);
	printTemplateArgumentList(OS, Ty->template_arguments(), getPrintingPolicy());

	auto *AliasDecl =
	cast<TypeAliasTemplateDecl>(Ty->getTemplateName().getAsTemplateDecl())
	->getTemplatedDecl();

	SourceLocation Loc = AliasDecl->getLocation();
	return DBuilder.createTypedef(Src, OS.str(), getOrCreateFile(Loc),
	getLineNumber(Loc),
	getDeclContextDescriptor(AliasDecl));
	}

	llvm::DIType CGDebugInfo::CreateType(const TypedefType Ty,
	llvm::DIFile *Unit) {
	// We don't set size information, but do specify where the typedef was
	// declared.
	SourceLocation Loc = Ty->getDecl()->getLocation();

	// Typedefs are derived from some other type.
	return DBuilder.createTypedef(
	getOrCreateType(Ty->getDecl()->getUnderlyingType(), Unit),
	Ty->getDecl()->getName(), getOrCreateFile(Loc), getLineNumber(Loc),
	getDeclContextDescriptor(Ty->getDecl()));
	}

	static unsigned getDwarfCC(CallingConv CC) {
	switch (CC) {
	case CC_C:
	// Avoid emitting DW_AT_calling_convention if the C convention was used.
	return 0;

	case CC_X86StdCall:
	return llvm::dwarf::DW_CC_BORLAND_stdcall;
	case CC_X86FastCall:
	return llvm::dwarf::DW_CC_BORLAND_msfastcall;
	case CC_X86ThisCall:
	return llvm::dwarf::DW_CC_BORLAND_thiscall;
	case CC_X86VectorCall:
	return llvm::dwarf::DW_CC_LLVM_vectorcall;
	case CC_X86Pascal:
	return llvm::dwarf::DW_CC_BORLAND_pascal;
	case CC_Win64:
	return llvm::dwarf::DW_CC_LLVM_Win64;
	case CC_X86_64SysV:
	return llvm::dwarf::DW_CC_LLVM_X86_64SysV;
	case CC_AAPCS:
	case CC_AArch64VectorCall:
	return llvm::dwarf::DW_CC_LLVM_AAPCS;
	case CC_AAPCS_VFP:
	return llvm::dwarf::DW_CC_LLVM_AAPCS_VFP;
	case CC_IntelOclBicc:
	return llvm::dwarf::DW_CC_LLVM_IntelOclBicc;
	case CC_SpirFunction:
	return llvm::dwarf::DW_CC_LLVM_SpirFunction;
	case CC_OpenCLKernel:
	return llvm::dwarf::DW_CC_LLVM_OpenCLKernel;
	case CC_Swift:
	return llvm::dwarf::DW_CC_LLVM_Swift;
	case CC_PreserveMost:
	return llvm::dwarf::DW_CC_LLVM_PreserveMost;
	case CC_PreserveAll:
	return llvm::dwarf::DW_CC_LLVM_PreserveAll;
	case CC_X86RegCall:
	return llvm::dwarf::DW_CC_LLVM_X86RegCall;
	}
	return 0;
	}

	llvm::DIType CGDebugInfo::CreateType(const FunctionType Ty,
	llvm::DIFile *Unit) {
	SmallVector<llvm::Metadata *, 16> EltTys;

	// Add the result type at least.
	EltTys.push_back(getOrCreateType(Ty->getReturnType(), Unit));

	// Set up remainder of arguments if there is a prototype.
	// otherwise emit it as a variadic function.
	if (isa<FunctionNoProtoType>(Ty))
	EltTys.push_back(DBuilder.createUnspecifiedParameter());
	else if (const auto *FPT = dyn_cast<FunctionProtoType>(Ty)) {
	for (const QualType &ParamType : FPT->param_types())
	EltTys.push_back(getOrCreateType(ParamType, Unit));
	if (FPT->isVariadic())
	EltTys.push_back(DBuilder.createUnspecifiedParameter());
	}

	llvm::DITypeRefArray EltTypeArray = DBuilder.getOrCreateTypeArray(EltTys);
	return DBuilder.createSubroutineType(EltTypeArray, llvm::DINode::FlagZero,
	getDwarfCC(Ty->getCallConv()));
	}

	/// Convert an AccessSpecifier into the corresponding DINode flag.
	/// As an optimization, return 0 if the access specifier equals the
	/// default for the containing type.
	static llvm::DINode::DIFlags getAccessFlag(AccessSpecifier Access,
	const RecordDecl *RD) {
	AccessSpecifier Default = clang::AS_none;
	if (RD && RD->isClass())
	Default = clang::AS_private;
	else if (RD && (RD->isStruct() \|\| RD->isUnion()))
	Default = clang::AS_public;

	if (Access == Default)
	return llvm::DINode::FlagZero;

	switch (Access) {
	case clang::AS_private:
	return llvm::DINode::FlagPrivate;
	case clang::AS_protected:
	return llvm::DINode::FlagProtected;
	case clang::AS_public:
	return llvm::DINode::FlagPublic;
	case clang::AS_none:
	return llvm::DINode::FlagZero;
	}
	llvm_unreachable("unexpected access enumerator");
	}

	llvm::DIType CGDebugInfo::createBitFieldType(const FieldDecl BitFieldDecl,
	llvm::DIScope *RecordTy,
	const RecordDecl *RD) {
	StringRef Name = BitFieldDecl->getName();
	QualType Ty = BitFieldDecl->getType();
	SourceLocation Loc = BitFieldDecl->getLocation();
	llvm::DIFile *VUnit = getOrCreateFile(Loc);
	llvm::DIType *DebugType = getOrCreateType(Ty, VUnit);

	// Get the location for the field.
	llvm::DIFile *File = getOrCreateFile(Loc);
	unsigned Line = getLineNumber(Loc);

	const CGBitFieldInfo &BitFieldInfo =
	CGM.getTypes().getCGRecordLayout(RD).getBitFieldInfo(BitFieldDecl);
	uint64_t SizeInBits = BitFieldInfo.Size;
	assert(SizeInBits > 0 && "found named 0-width bitfield");
	uint64_t StorageOffsetInBits =
	CGM.getContext().toBits(BitFieldInfo.StorageOffset);
	uint64_t Offset = BitFieldInfo.Offset;
	// The bit offsets for big endian machines are reversed for big
	// endian target, compensate for that as the DIDerivedType requires
	// un-reversed offsets.
	if (CGM.getDataLayout().isBigEndian())
	Offset = BitFieldInfo.StorageSize - BitFieldInfo.Size - Offset;
	uint64_t OffsetInBits = StorageOffsetInBits + Offset;
	llvm::DINode::DIFlags Flags = getAccessFlag(BitFieldDecl->getAccess(), RD);
	return DBuilder.createBitFieldMemberType(
	RecordTy, Name, File, Line, SizeInBits, OffsetInBits, StorageOffsetInBits,
	Flags, DebugType);
	}

	llvm::DIType *
	CGDebugInfo::createFieldType(StringRef name, QualType type, SourceLocation loc,
	AccessSpecifier AS, uint64_t offsetInBits,
	uint32_t AlignInBits, llvm::DIFile *tunit,
	llvm::DIScope scope, const RecordDecl RD) {
	llvm::DIType *debugType = getOrCreateType(type, tunit);

	// Get the location for the field.
	llvm::DIFile *file = getOrCreateFile(loc);
	unsigned line = getLineNumber(loc);

	uint64_t SizeInBits = 0;
	auto Align = AlignInBits;
	if (!type->isIncompleteArrayType()) {
	TypeInfo TI = CGM.getContext().getTypeInfo(type);
	SizeInBits = TI.Width;
	if (!Align)
	Align = getTypeAlignIfRequired(type, CGM.getContext());
	}

	llvm::DINode::DIFlags flags = getAccessFlag(AS, RD);
	return DBuilder.createMemberType(scope, name, file, line, SizeInBits, Align,
	offsetInBits, flags, debugType);
	}

	void CGDebugInfo::CollectRecordLambdaFields(
	const CXXRecordDecl CXXDecl, SmallVectorImpl<llvm::Metadata > &elements,
	llvm::DIType *RecordTy) {
	// For C++11 Lambdas a Field will be the same as a Capture, but the Capture
	// has the name and the location of the variable so we should iterate over
	// both concurrently.
	const ASTRecordLayout &layout = CGM.getContext().getASTRecordLayout(CXXDecl);
	RecordDecl::field_iterator Field = CXXDecl->field_begin();
	unsigned fieldno = 0;
	for (CXXRecordDecl::capture_const_iterator I = CXXDecl->captures_begin(),
	E = CXXDecl->captures_end();
	I != E; ++I, ++Field, ++fieldno) {
	const LambdaCapture &C = *I;
	if (C.capturesVariable()) {
	SourceLocation Loc = C.getLocation();
	assert(!Field->isBitField() && "lambdas don't have bitfield members!");
	VarDecl *V = C.getCapturedVar();
	StringRef VName = V->getName();
	llvm::DIFile *VUnit = getOrCreateFile(Loc);
	auto Align = getDeclAlignIfRequired(V, CGM.getContext());
	llvm::DIType *FieldType = createFieldType(
	VName, Field->getType(), Loc, Field->getAccess(),
	layout.getFieldOffset(fieldno), Align, VUnit, RecordTy, CXXDecl);
	elements.push_back(FieldType);
	} else if (C.capturesThis()) {
	// TODO: Need to handle 'this' in some way by probably renaming the
	// this of the lambda class and having a field member of 'this' or
	// by using AT_object_pointer for the function and having that be
	// used as 'this' for semantic references.
	FieldDecl f = Field;
	llvm::DIFile *VUnit = getOrCreateFile(f->getLocation());
	QualType type = f->getType();
	llvm::DIType *fieldType = createFieldType(
	"this", type, f->getLocation(), f->getAccess(),
	layout.getFieldOffset(fieldno), VUnit, RecordTy, CXXDecl);

	elements.push_back(fieldType);
	}
	}
	}

	llvm::DIDerivedType *
	CGDebugInfo::CreateRecordStaticField(const VarDecl Var, llvm::DIType RecordTy,
	const RecordDecl *RD) {
	// Create the descriptor for the static variable, with or without
	// constant initializers.
	Var = Var->getCanonicalDecl();
	llvm::DIFile *VUnit = getOrCreateFile(Var->getLocation());
	llvm::DIType *VTy = getOrCreateType(Var->getType(), VUnit);

	unsigned LineNumber = getLineNumber(Var->getLocation());
	StringRef VName = Var->getName();
	llvm::Constant *C = nullptr;
	if (Var->getInit()) {
	const APValue *Value = Var->evaluateValue();
	if (Value) {
	if (Value->isInt())
	C = llvm::ConstantInt::get(CGM.getLLVMContext(), Value->getInt());
	if (Value->isFloat())
	C = llvm::ConstantFP::get(CGM.getLLVMContext(), Value->getFloat());
	}
	}

	llvm::DINode::DIFlags Flags = getAccessFlag(Var->getAccess(), RD);
	auto Align = getDeclAlignIfRequired(Var, CGM.getContext());
	llvm::DIDerivedType *GV = DBuilder.createStaticMemberType(
	RecordTy, VName, VUnit, LineNumber, VTy, Flags, C, Align);
	StaticDataMemberCache[Var->getCanonicalDecl()].reset(GV);
	return GV;
	}

	void CGDebugInfo::CollectRecordNormalField(
	const FieldDecl field, uint64_t OffsetInBits, llvm::DIFile tunit,
	SmallVectorImpl<llvm::Metadata > &elements, llvm::DIType RecordTy,
	const RecordDecl *RD) {
	StringRef name = field->getName();
	QualType type = field->getType();

	// Ignore unnamed fields unless they're anonymous structs/unions.
	if (name.empty() && !type->isRecordType())
	return;

	llvm::DIType *FieldType;
	if (field->isBitField()) {
	FieldType = createBitFieldType(field, RecordTy, RD);
	} else {
	auto Align = getDeclAlignIfRequired(field, CGM.getContext());
	FieldType =
	createFieldType(name, type, field->getLocation(), field->getAccess(),
	OffsetInBits, Align, tunit, RecordTy, RD);
	}

	elements.push_back(FieldType);
	}

	void CGDebugInfo::CollectRecordNestedType(
	const TypeDecl TD, SmallVectorImpl<llvm::Metadata > &elements) {
	QualType Ty = CGM.getContext().getTypeDeclType(TD);
	// Injected class names are not considered nested records.
	if (isa<InjectedClassNameType>(Ty))
	return;
	SourceLocation Loc = TD->getLocation();
	llvm::DIType *nestedType = getOrCreateType(Ty, getOrCreateFile(Loc));
	elements.push_back(nestedType);
	}

	void CGDebugInfo::CollectRecordFields(
	const RecordDecl record, llvm::DIFile tunit,
	SmallVectorImpl<llvm::Metadata *> &elements,
	llvm::DICompositeType *RecordTy) {
	const auto *CXXDecl = dyn_cast<CXXRecordDecl>(record);

	if (CXXDecl && CXXDecl->isLambda())
	CollectRecordLambdaFields(CXXDecl, elements, RecordTy);
	else {
	const ASTRecordLayout &layout = CGM.getContext().getASTRecordLayout(record);

	// Field number for non-static fields.
	unsigned fieldNo = 0;

	// Static and non-static members should appear in the same order as
	// the corresponding declarations in the source program.
	for (const auto *I : record->decls())
	if (const auto *V = dyn_cast<VarDecl>(I)) {
	if (V->hasAttr<NoDebugAttr>())
	continue;

	// Skip variable template specializations when emitting CodeView. MSVC
	// doesn't emit them.
	if (CGM.getCodeGenOpts().EmitCodeView &&
	isa<VarTemplateSpecializationDecl>(V))
	continue;

	// Reuse the existing static member declaration if one exists
	auto MI = StaticDataMemberCache.find(V->getCanonicalDecl());
	if (MI != StaticDataMemberCache.end()) {
	assert(MI->second &&
	"Static data member declaration should still exist");
	elements.push_back(MI->second);
	} else {
	auto Field = CreateRecordStaticField(V, RecordTy, record);
	elements.push_back(Field);
	}
	} else if (const auto *field = dyn_cast<FieldDecl>(I)) {
	CollectRecordNormalField(field, layout.getFieldOffset(fieldNo), tunit,
	elements, RecordTy, record);

	// Bump field number for next field.
	++fieldNo;
	} else if (CGM.getCodeGenOpts().EmitCodeView) {
	// Debug info for nested types is included in the member list only for
	// CodeView.
	if (const auto *nestedType = dyn_cast<TypeDecl>(I))
	if (!nestedType->isImplicit() &&
	nestedType->getDeclContext() == record)
	CollectRecordNestedType(nestedType, elements);
	}
	}
	}

	llvm::DISubroutineType *
	CGDebugInfo::getOrCreateMethodType(const CXXMethodDecl *Method,
	llvm::DIFile *Unit) {
	const FunctionProtoType *Func = Method->getType()->getAs<FunctionProtoType>();
	if (Method->isStatic())
	return cast_or_null<llvm::DISubroutineType>(
	getOrCreateType(QualType(Func, 0), Unit));
	return getOrCreateInstanceMethodType(Method->getThisType(), Func, Unit);
	}

	llvm::DISubroutineType *CGDebugInfo::getOrCreateInstanceMethodType(
	QualType ThisPtr, const FunctionProtoType Func, llvm::DIFile Unit) {
	// Add "this" pointer.
	llvm::DITypeRefArray Args(
	cast<llvm::DISubroutineType>(getOrCreateType(QualType(Func, 0), Unit))
	->getTypeArray());
	assert(Args.size() && "Invalid number of arguments!");

	SmallVector<llvm::Metadata *, 16> Elts;

	// First element is always return type. For 'void' functions it is NULL.
	Elts.push_back(Args[0]);

	// "this" pointer is always first argument.
	const CXXRecordDecl *RD = ThisPtr->getPointeeCXXRecordDecl();
	if (isa<ClassTemplateSpecializationDecl>(RD)) {
	// Create pointer type directly in this case.
	const PointerType *ThisPtrTy = cast<PointerType>(ThisPtr);
	QualType PointeeTy = ThisPtrTy->getPointeeType();
	unsigned AS = CGM.getContext().getTargetAddressSpace(PointeeTy);
	uint64_t Size = CGM.getTarget().getPointerWidth(AS);
	auto Align = getTypeAlignIfRequired(ThisPtrTy, CGM.getContext());
	llvm::DIType *PointeeType = getOrCreateType(PointeeTy, Unit);
	llvm::DIType *ThisPtrType =
	DBuilder.createPointerType(PointeeType, Size, Align);
	TypeCache[ThisPtr.getAsOpaquePtr()].reset(ThisPtrType);
	// TODO: This and the artificial type below are misleading, the
	// types aren't artificial the argument is, but the current
	// metadata doesn't represent that.
	ThisPtrType = DBuilder.createObjectPointerType(ThisPtrType);
	Elts.push_back(ThisPtrType);
	} else {
	llvm::DIType *ThisPtrType = getOrCreateType(ThisPtr, Unit);
	TypeCache[ThisPtr.getAsOpaquePtr()].reset(ThisPtrType);
	ThisPtrType = DBuilder.createObjectPointerType(ThisPtrType);
	Elts.push_back(ThisPtrType);
	}

	// Copy rest of the arguments.
	for (unsigned i = 1, e = Args.size(); i != e; ++i)
	Elts.push_back(Args[i]);

	llvm::DITypeRefArray EltTypeArray = DBuilder.getOrCreateTypeArray(Elts);

	llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero;
	if (Func->getExtProtoInfo().RefQualifier == RQ_LValue)
	Flags \|= llvm::DINode::FlagLValueReference;
	if (Func->getExtProtoInfo().RefQualifier == RQ_RValue)
	Flags \|= llvm::DINode::FlagRValueReference;

	return DBuilder.createSubroutineType(EltTypeArray, Flags,
	getDwarfCC(Func->getCallConv()));
	}

	/// isFunctionLocalClass - Return true if CXXRecordDecl is defined
	/// inside a function.
	static bool isFunctionLocalClass(const CXXRecordDecl *RD) {
	if (const auto *NRD = dyn_cast<CXXRecordDecl>(RD->getDeclContext()))
	return isFunctionLocalClass(NRD);
	if (isa<FunctionDecl>(RD->getDeclContext()))
	return true;
	return false;
	}

	llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction(
	const CXXMethodDecl Method, llvm::DIFile Unit, llvm::DIType *RecordTy) {
	bool IsCtorOrDtor =
	isa<CXXConstructorDecl>(Method) \|\| isa<CXXDestructorDecl>(Method);

	StringRef MethodName = getFunctionName(Method);
	llvm::DISubroutineType *MethodTy = getOrCreateMethodType(Method, Unit);

	// Since a single ctor/dtor corresponds to multiple functions, it doesn't
	// make sense to give a single ctor/dtor a linkage name.
	StringRef MethodLinkageName;
	// FIXME: 'isFunctionLocalClass' seems like an arbitrary/unintentional
	// property to use here. It may've been intended to model "is non-external
	// type" but misses cases of non-function-local but non-external classes such
	// as those in anonymous namespaces as well as the reverse - external types
	// that are function local, such as those in (non-local) inline functions.
	if (!IsCtorOrDtor && !isFunctionLocalClass(Method->getParent()))
	MethodLinkageName = CGM.getMangledName(Method);

	// Get the location for the method.
	llvm::DIFile *MethodDefUnit = nullptr;
	unsigned MethodLine = 0;
	if (!Method->isImplicit()) {
	MethodDefUnit = getOrCreateFile(Method->getLocation());
	MethodLine = getLineNumber(Method->getLocation());
	}

	// Collect virtual method info.
	llvm::DIType *ContainingType = nullptr;
	unsigned VIndex = 0;
	llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero;
	llvm::DISubprogram::DISPFlags SPFlags = llvm::DISubprogram::SPFlagZero;
	int ThisAdjustment = 0;

	if (Method->isVirtual()) {
	if (Method->isPure())
	SPFlags \|= llvm::DISubprogram::SPFlagPureVirtual;
	else
	SPFlags \|= llvm::DISubprogram::SPFlagVirtual;

	if (CGM.getTarget().getCXXABI().isItaniumFamily()) {
	// It doesn't make sense to give a virtual destructor a vtable index,
	// since a single destructor has two entries in the vtable.
	if (!isa<CXXDestructorDecl>(Method))
	VIndex = CGM.getItaniumVTableContext().getMethodVTableIndex(Method);
	} else {
	// Emit MS ABI vftable information. There is only one entry for the
	// deleting dtor.
	const auto *DD = dyn_cast<CXXDestructorDecl>(Method);
	GlobalDecl GD = DD ? GlobalDecl(DD, Dtor_Deleting) : GlobalDecl(Method);
	MethodVFTableLocation ML =
	CGM.getMicrosoftVTableContext().getMethodVFTableLocation(GD);
	VIndex = ML.Index;

	// CodeView only records the vftable offset in the class that introduces
	// the virtual method. This is possible because, unlike Itanium, the MS
	// C++ ABI does not include all virtual methods from non-primary bases in
	// the vtable for the most derived class. For example, if C inherits from
	// A and B, C's primary vftable will not include B's virtual methods.
	if (Method->size_overridden_methods() == 0)
	Flags \|= llvm::DINode::FlagIntroducedVirtual;

	// The 'this' adjustment accounts for both the virtual and non-virtual
	// portions of the adjustment. Presumably the debugger only uses it when
	// it knows the dynamic type of an object.
	ThisAdjustment = CGM.getCXXABI()
	.getVirtualFunctionPrologueThisAdjustment(GD)
	.getQuantity();
	}
	ContainingType = RecordTy;
	}

	if (Method->isStatic())
	Flags \|= llvm::DINode::FlagStaticMember;
	if (Method->isImplicit())
	Flags \|= llvm::DINode::FlagArtificial;
	Flags \|= getAccessFlag(Method->getAccess(), Method->getParent());
	if (const auto *CXXC = dyn_cast<CXXConstructorDecl>(Method)) {
	if (CXXC->isExplicit())
	Flags \|= llvm::DINode::FlagExplicit;
	} else if (const auto *CXXC = dyn_cast<CXXConversionDecl>(Method)) {
	if (CXXC->isExplicit())
	Flags \|= llvm::DINode::FlagExplicit;
	}
	if (Method->hasPrototype())
	Flags \|= llvm::DINode::FlagPrototyped;
	if (Method->getRefQualifier() == RQ_LValue)
	Flags \|= llvm::DINode::FlagLValueReference;
	if (Method->getRefQualifier() == RQ_RValue)
	Flags \|= llvm::DINode::FlagRValueReference;
	if (CGM.getLangOpts().Optimize)
	SPFlags \|= llvm::DISubprogram::SPFlagOptimized;

	llvm::DINodeArray TParamsArray = CollectFunctionTemplateParams(Method, Unit);
	llvm::DISubprogram *SP = DBuilder.createMethod(
	RecordTy, MethodName, MethodLinkageName, MethodDefUnit, MethodLine,
	MethodTy, VIndex, ThisAdjustment, ContainingType, Flags, SPFlags,
	TParamsArray.get());

	SPCache[Method->getCanonicalDecl()].reset(SP);

	return SP;
	}

	void CGDebugInfo::CollectCXXMemberFunctions(
	const CXXRecordDecl RD, llvm::DIFile Unit,
	SmallVectorImpl<llvm::Metadata > &EltTys, llvm::DIType RecordTy) {

	// Since we want more than just the individual member decls if we
	// have templated functions iterate over every declaration to gather
	// the functions.
	for (const auto *I : RD->decls()) {
	const auto *Method = dyn_cast<CXXMethodDecl>(I);
	// If the member is implicit, don't add it to the member list. This avoids
	// the member being added to type units by LLVM, while still allowing it
	// to be emitted into the type declaration/reference inside the compile
	// unit.
	// Ditto 'nodebug' methods, for consistency with CodeGenFunction.cpp.
	// FIXME: Handle Using(Shadow?)Decls here to create
	// DW_TAG_imported_declarations inside the class for base decls brought into
	// derived classes. GDB doesn't seem to notice/leverage these when I tried
	// it, so I'm not rushing to fix this. (GCC seems to produce them, if
	// referenced)
	if (!Method \|\| Method->isImplicit() \|\| Method->hasAttr<NoDebugAttr>())
	continue;

	if (Method->getType()->getAs<FunctionProtoType>()->getContainedAutoType())
	continue;

	// Reuse the existing member function declaration if it exists.
	// It may be associated with the declaration of the type & should be
	// reused as we're building the definition.
	//
	// This situation can arise in the vtable-based debug info reduction where
	// implicit members are emitted in a non-vtable TU.
	auto MI = SPCache.find(Method->getCanonicalDecl());
	EltTys.push_back(MI == SPCache.end()
	? CreateCXXMemberFunction(Method, Unit, RecordTy)
	: static_cast<llvm::Metadata *>(MI->second));
	}
	}

	void CGDebugInfo::CollectCXXBases(const CXXRecordDecl RD, llvm::DIFile Unit,
	SmallVectorImpl<llvm::Metadata *> &EltTys,
	llvm::DIType *RecordTy) {
	llvm::DenseSet<CanonicalDeclPtr<const CXXRecordDecl>> SeenTypes;
	CollectCXXBasesAux(RD, Unit, EltTys, RecordTy, RD->bases(), SeenTypes,
	llvm::DINode::FlagZero);

	// If we are generating CodeView debug info, we also need to emit records for
	// indirect virtual base classes.
	if (CGM.getCodeGenOpts().EmitCodeView) {
	CollectCXXBasesAux(RD, Unit, EltTys, RecordTy, RD->vbases(), SeenTypes,
	llvm::DINode::FlagIndirectVirtualBase);
	}
	}

	void CGDebugInfo::CollectCXXBasesAux(
	const CXXRecordDecl RD, llvm::DIFile Unit,
	SmallVectorImpl<llvm::Metadata > &EltTys, llvm::DIType RecordTy,
	const CXXRecordDecl::base_class_const_range &Bases,
	llvm::DenseSet<CanonicalDeclPtr<const CXXRecordDecl>> &SeenTypes,
	llvm::DINode::DIFlags StartingFlags) {
	const ASTRecordLayout &RL = CGM.getContext().getASTRecordLayout(RD);
	for (const auto &BI : Bases) {
	const auto *Base =
	cast<CXXRecordDecl>(BI.getType()->getAs<RecordType>()->getDecl());
	if (!SeenTypes.insert(Base).second)
	continue;
	auto *BaseTy = getOrCreateType(BI.getType(), Unit);
	llvm::DINode::DIFlags BFlags = StartingFlags;
	uint64_t BaseOffset;
	uint32_t VBPtrOffset = 0;

	if (BI.isVirtual()) {
	if (CGM.getTarget().getCXXABI().isItaniumFamily()) {
	// virtual base offset offset is -ve. The code generator emits dwarf
	// expression where it expects +ve number.
	BaseOffset = 0 - CGM.getItaniumVTableContext()
	.getVirtualBaseOffsetOffset(RD, Base)
	.getQuantity();
	} else {
	// In the MS ABI, store the vbtable offset, which is analogous to the
	// vbase offset offset in Itanium.
	BaseOffset =
	4 * CGM.getMicrosoftVTableContext().getVBTableIndex(RD, Base);
	VBPtrOffset = CGM.getContext()
	.getASTRecordLayout(RD)
	.getVBPtrOffset()
	.getQuantity();
	}
	BFlags \|= llvm::DINode::FlagVirtual;
	} else
	BaseOffset = CGM.getContext().toBits(RL.getBaseClassOffset(Base));
	// FIXME: Inconsistent units for BaseOffset. It is in bytes when
	// BI->isVirtual() and bits when not.

	BFlags \|= getAccessFlag(BI.getAccessSpecifier(), RD);
	llvm::DIType *DTy = DBuilder.createInheritance(RecordTy, BaseTy, BaseOffset,
	VBPtrOffset, BFlags);
	EltTys.push_back(DTy);
	}
	}

	llvm::DINodeArray
	CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList,
	ArrayRef<TemplateArgument> TAList,
	llvm::DIFile *Unit) {
	SmallVector<llvm::Metadata *, 16> TemplateParams;
	for (unsigned i = 0, e = TAList.size(); i != e; ++i) {
	const TemplateArgument &TA = TAList[i];
	StringRef Name;
	if (TPList)
	Name = TPList->getParam(i)->getName();
	switch (TA.getKind()) {
	case TemplateArgument::Type: {
	llvm::DIType *TTy = getOrCreateType(TA.getAsType(), Unit);
	TemplateParams.push_back(
	DBuilder.createTemplateTypeParameter(TheCU, Name, TTy));
	} break;
	case TemplateArgument::Integral: {
	llvm::DIType *TTy = getOrCreateType(TA.getIntegralType(), Unit);
	TemplateParams.push_back(DBuilder.createTemplateValueParameter(
	TheCU, Name, TTy,
	llvm::ConstantInt::get(CGM.getLLVMContext(), TA.getAsIntegral())));
	} break;
	case TemplateArgument::Declaration: {
	const ValueDecl *D = TA.getAsDecl();
	QualType T = TA.getParamTypeForDecl().getDesugaredType(CGM.getContext());
	llvm::DIType *TTy = getOrCreateType(T, Unit);
	llvm::Constant *V = nullptr;
	const CXXMethodDecl *MD;
	// Variable pointer template parameters have a value that is the address
	// of the variable.
	if (const auto *VD = dyn_cast<VarDecl>(D))
	V = CGM.GetAddrOfGlobalVar(VD);
	// Member function pointers have special support for building them, though
	// this is currently unsupported in LLVM CodeGen.
	else if ((MD = dyn_cast<CXXMethodDecl>(D)) && MD->isInstance())
	V = CGM.getCXXABI().EmitMemberFunctionPointer(MD);
	else if (const auto *FD = dyn_cast<FunctionDecl>(D))
	V = CGM.GetAddrOfFunction(FD);
	// Member data pointers have special handling too to compute the fixed
	// offset within the object.
	else if (const auto *MPT = dyn_cast<MemberPointerType>(T.getTypePtr())) {
	// These five lines (& possibly the above member function pointer
	// handling) might be able to be refactored to use similar code in
	// CodeGenModule::getMemberPointerConstant
	uint64_t fieldOffset = CGM.getContext().getFieldOffset(D);
	CharUnits chars =
	CGM.getContext().toCharUnitsFromBits((int64_t)fieldOffset);
	V = CGM.getCXXABI().EmitMemberDataPointer(MPT, chars);
	}
	TemplateParams.push_back(DBuilder.createTemplateValueParameter(
	TheCU, Name, TTy,
	cast_or_null<llvm::Constant>(V->stripPointerCasts())));
	} break;
	case TemplateArgument::NullPtr: {
	QualType T = TA.getNullPtrType();
	llvm::DIType *TTy = getOrCreateType(T, Unit);
	llvm::Constant *V = nullptr;
	// Special case member data pointer null values since they're actually -1
	// instead of zero.
	if (const auto *MPT = dyn_cast<MemberPointerType>(T.getTypePtr()))
	// But treat member function pointers as simple zero integers because
	// it's easier than having a special case in LLVM's CodeGen. If LLVM
	// CodeGen grows handling for values of non-null member function
	// pointers then perhaps we could remove this special case and rely on
	// EmitNullMemberPointer for member function pointers.
	if (MPT->isMemberDataPointer())
	V = CGM.getCXXABI().EmitNullMemberPointer(MPT);
	if (!V)
	V = llvm::ConstantInt::get(CGM.Int8Ty, 0);
	TemplateParams.push_back(
	DBuilder.createTemplateValueParameter(TheCU, Name, TTy, V));
	} break;
	case TemplateArgument::Template:
	TemplateParams.push_back(DBuilder.createTemplateTemplateParameter(
	TheCU, Name, nullptr,
	TA.getAsTemplate().getAsTemplateDecl()->getQualifiedNameAsString()));
	break;
	case TemplateArgument::Pack:
	TemplateParams.push_back(DBuilder.createTemplateParameterPack(
	TheCU, Name, nullptr,
	CollectTemplateParams(nullptr, TA.getPackAsArray(), Unit)));
	break;
	case TemplateArgument::Expression: {
	const Expr *E = TA.getAsExpr();
	QualType T = E->getType();
	if (E->isGLValue())
	T = CGM.getContext().getLValueReferenceType(T);
	llvm::Constant *V = ConstantEmitter(CGM).emitAbstract(E, T);
	assert(V && "Expression in template argument isn't constant");
	llvm::DIType *TTy = getOrCreateType(T, Unit);
	TemplateParams.push_back(DBuilder.createTemplateValueParameter(
	TheCU, Name, TTy, V->stripPointerCasts()));
	} break;
	// And the following should never occur:
	case TemplateArgument::TemplateExpansion:
	case TemplateArgument::Null:
	llvm_unreachable(
	"These argument types shouldn't exist in concrete types");
	}
	}
	return DBuilder.getOrCreateArray(TemplateParams);
	}

	llvm::DINodeArray
	CGDebugInfo::CollectFunctionTemplateParams(const FunctionDecl *FD,
	llvm::DIFile *Unit) {
	if (FD->getTemplatedKind() ==
	FunctionDecl::TK_FunctionTemplateSpecialization) {
	const TemplateParameterList *TList = FD->getTemplateSpecializationInfo()
	->getTemplate()
	->getTemplateParameters();
	return CollectTemplateParams(
	TList, FD->getTemplateSpecializationArgs()->asArray(), Unit);
	}
	return llvm::DINodeArray();
	}

	llvm::DINodeArray CGDebugInfo::CollectVarTemplateParams(const VarDecl *VL,
	- llvm::DIFile *Unit) {
	- if (auto *TS = dyn_cast<VarTemplateSpecializationDecl>(VL)) {
	- auto T = TS->getSpecializedTemplateOrPartial();
	- auto TA = TS->getTemplateArgs().asArray();
	- // Collect parameters for a partial specialization
	- if (T.is<VarTemplatePartialSpecializationDecl *>()) {
	- const TemplateParameterList *TList =
	- T.get<VarTemplatePartialSpecializationDecl *>()
	- ->getTemplateParameters();
	- return CollectTemplateParams(TList, TA, Unit);
	- }
	-
	- // Collect parameters for an explicit specialization
	- if (T.is<VarTemplateDecl *>()) {
	- const TemplateParameterList TList = T.get<VarTemplateDecl >()
	- ->getTemplateParameters();
	- return CollectTemplateParams(TList, TA, Unit);
	- }
	- }
	- return llvm::DINodeArray();
	+ llvm::DIFile *Unit) {
	+ // Always get the full list of parameters, not just the ones from the
	+ // specialization. A partial specialization may have fewer parameters than
	+ // there are arguments.
	+ auto *TS = dyn_cast<VarTemplateSpecializationDecl>(VL);
	+ if (!TS)
	+ return llvm::DINodeArray();
	+ VarTemplateDecl *T = TS->getSpecializedTemplate();
	+ const TemplateParameterList *TList = T->getTemplateParameters();
	+ auto TA = TS->getTemplateArgs().asArray();
	+ return CollectTemplateParams(TList, TA, Unit);
	}

	llvm::DINodeArray CGDebugInfo::CollectCXXTemplateParams(
	const ClassTemplateSpecializationDecl TSpecial, llvm::DIFile Unit) {
	- // Always get the full list of parameters, not just the ones from
	- // the specialization.
	+ // Always get the full list of parameters, not just the ones from the
	+ // specialization. A partial specialization may have fewer parameters than
	+ // there are arguments.
	TemplateParameterList *TPList =
	TSpecial->getSpecializedTemplate()->getTemplateParameters();
	const TemplateArgumentList &TAList = TSpecial->getTemplateArgs();
	return CollectTemplateParams(TPList, TAList.asArray(), Unit);
	}

	llvm::DIType CGDebugInfo::getOrCreateVTablePtrType(llvm::DIFile Unit) {
	if (VTablePtrType)
	return VTablePtrType;

	ASTContext &Context = CGM.getContext();

	/* Function type */
	llvm::Metadata *STy = getOrCreateType(Context.IntTy, Unit);
	llvm::DITypeRefArray SElements = DBuilder.getOrCreateTypeArray(STy);
	llvm::DIType *SubTy = DBuilder.createSubroutineType(SElements);
	unsigned Size = Context.getTypeSize(Context.VoidPtrTy);
	unsigned VtblPtrAddressSpace = CGM.getTarget().getVtblPtrAddressSpace();
	Optional<unsigned> DWARFAddressSpace =
	CGM.getTarget().getDWARFAddressSpace(VtblPtrAddressSpace);

	llvm::DIType *vtbl_ptr_type = DBuilder.createPointerType(
	SubTy, Size, 0, DWARFAddressSpace, "__vtbl_ptr_type");
	VTablePtrType = DBuilder.createPointerType(vtbl_ptr_type, Size);
	return VTablePtrType;
	}

	StringRef CGDebugInfo::getVTableName(const CXXRecordDecl *RD) {
	// Copy the gdb compatible name on the side and use its reference.
	return internString("_vptr$", RD->getNameAsString());
	}

	void CGDebugInfo::CollectVTableInfo(const CXXRecordDecl RD, llvm::DIFile Unit,
	SmallVectorImpl<llvm::Metadata *> &EltTys,
	llvm::DICompositeType *RecordTy) {
	// If this class is not dynamic then there is not any vtable info to collect.
	if (!RD->isDynamicClass())
	return;

	// Don't emit any vtable shape or vptr info if this class doesn't have an
	// extendable vfptr. This can happen if the class doesn't have virtual
	// methods, or in the MS ABI if those virtual methods only come from virtually
	// inherited bases.
	const ASTRecordLayout &RL = CGM.getContext().getASTRecordLayout(RD);
	if (!RL.hasExtendableVFPtr())
	return;

	// CodeView needs to know how large the vtable of every dynamic class is, so
	// emit a special named pointer type into the element list. The vptr type
	// points to this type as well.
	llvm::DIType *VPtrTy = nullptr;
	bool NeedVTableShape = CGM.getCodeGenOpts().EmitCodeView &&
	CGM.getTarget().getCXXABI().isMicrosoft();
	if (NeedVTableShape) {
	uint64_t PtrWidth =
	CGM.getContext().getTypeSize(CGM.getContext().VoidPtrTy);
	const VTableLayout &VFTLayout =
	CGM.getMicrosoftVTableContext().getVFTableLayout(RD, CharUnits::Zero());
	unsigned VSlotCount =
	VFTLayout.vtable_components().size() - CGM.getLangOpts().RTTIData;
	unsigned VTableWidth = PtrWidth * VSlotCount;
	unsigned VtblPtrAddressSpace = CGM.getTarget().getVtblPtrAddressSpace();
	Optional<unsigned> DWARFAddressSpace =
	CGM.getTarget().getDWARFAddressSpace(VtblPtrAddressSpace);

	// Create a very wide void* type and insert it directly in the element list.
	llvm::DIType *VTableType = DBuilder.createPointerType(
	nullptr, VTableWidth, 0, DWARFAddressSpace, "__vtbl_ptr_type");
	EltTys.push_back(VTableType);

	// The vptr is a pointer to this special vtable type.
	VPtrTy = DBuilder.createPointerType(VTableType, PtrWidth);
	}

	// If there is a primary base then the artificial vptr member lives there.
	if (RL.getPrimaryBase())
	return;

	if (!VPtrTy)
	VPtrTy = getOrCreateVTablePtrType(Unit);

	unsigned Size = CGM.getContext().getTypeSize(CGM.getContext().VoidPtrTy);
	llvm::DIType *VPtrMember =
	DBuilder.createMemberType(Unit, getVTableName(RD), Unit, 0, Size, 0, 0,
	llvm::DINode::FlagArtificial, VPtrTy);
	EltTys.push_back(VPtrMember);
	}

	llvm::DIType *CGDebugInfo::getOrCreateRecordType(QualType RTy,
	SourceLocation Loc) {
	assert(DebugKind >= codegenoptions::LimitedDebugInfo);
	llvm::DIType *T = getOrCreateType(RTy, getOrCreateFile(Loc));
	return T;
	}

	llvm::DIType *CGDebugInfo::getOrCreateInterfaceType(QualType D,
	SourceLocation Loc) {
	return getOrCreateStandaloneType(D, Loc);
	}

	llvm::DIType *CGDebugInfo::getOrCreateStandaloneType(QualType D,
	SourceLocation Loc) {
	assert(DebugKind >= codegenoptions::LimitedDebugInfo);
	assert(!D.isNull() && "null type");
	llvm::DIType *T = getOrCreateType(D, getOrCreateFile(Loc));
	assert(T && "could not create debug info for type");

	RetainedTypes.push_back(D.getAsOpaquePtr());
	return T;
	}

	void CGDebugInfo::completeType(const EnumDecl *ED) {
	if (DebugKind <= codegenoptions::DebugLineTablesOnly)
	return;
	QualType Ty = CGM.getContext().getEnumType(ED);
	void *TyPtr = Ty.getAsOpaquePtr();
	auto I = TypeCache.find(TyPtr);
	if (I == TypeCache.end() \|\| !cast<llvm::DIType>(I->second)->isForwardDecl())
	return;
	llvm::DIType *Res = CreateTypeDefinition(Ty->castAs<EnumType>());
	assert(!Res->isForwardDecl());
	TypeCache[TyPtr].reset(Res);
	}

	void CGDebugInfo::completeType(const RecordDecl *RD) {
	if (DebugKind > codegenoptions::LimitedDebugInfo \|\|
	!CGM.getLangOpts().CPlusPlus)
	completeRequiredType(RD);
	}

	/// Return true if the class or any of its methods are marked dllimport.
	static bool isClassOrMethodDLLImport(const CXXRecordDecl *RD) {
	if (RD->hasAttr<DLLImportAttr>())
	return true;
	for (const CXXMethodDecl *MD : RD->methods())
	if (MD->hasAttr<DLLImportAttr>())
	return true;
	return false;
	}

	/// Does a type definition exist in an imported clang module?
	static bool isDefinedInClangModule(const RecordDecl *RD) {
	// Only definitions that where imported from an AST file come from a module.
	if (!RD \|\| !RD->isFromASTFile())
	return false;
	// Anonymous entities cannot be addressed. Treat them as not from module.
	if (!RD->isExternallyVisible() && RD->getName().empty())
	return false;
	if (auto *CXXDecl = dyn_cast<CXXRecordDecl>(RD)) {
	if (!CXXDecl->isCompleteDefinition())
	return false;
	// Check wether RD is a template.
	auto TemplateKind = CXXDecl->getTemplateSpecializationKind();
	if (TemplateKind != TSK_Undeclared) {
	// Unfortunately getOwningModule() isn't accurate enough to find the
	// owning module of a ClassTemplateSpecializationDecl that is inside a
	// namespace spanning multiple modules.
	bool Explicit = false;
	if (auto *TD = dyn_cast<ClassTemplateSpecializationDecl>(CXXDecl))
	Explicit = TD->isExplicitInstantiationOrSpecialization();
	if (!Explicit && CXXDecl->getEnclosingNamespaceContext())
	return false;
	// This is a template, check the origin of the first member.
	if (CXXDecl->field_begin() == CXXDecl->field_end())
	return TemplateKind == TSK_ExplicitInstantiationDeclaration;
	if (!CXXDecl->field_begin()->isFromASTFile())
	return false;
	}
	}
	return true;
	}

	void CGDebugInfo::completeClassData(const RecordDecl *RD) {
	if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD))
	if (CXXRD->isDynamicClass() &&
	CGM.getVTableLinkage(CXXRD) ==
	llvm::GlobalValue::AvailableExternallyLinkage &&
	!isClassOrMethodDLLImport(CXXRD))
	return;

	if (DebugTypeExtRefs && isDefinedInClangModule(RD->getDefinition()))
	return;

	completeClass(RD);
	}

	void CGDebugInfo::completeClass(const RecordDecl *RD) {
	if (DebugKind <= codegenoptions::DebugLineTablesOnly)
	return;
	QualType Ty = CGM.getContext().getRecordType(RD);
	void *TyPtr = Ty.getAsOpaquePtr();
	auto I = TypeCache.find(TyPtr);
	if (I != TypeCache.end() && !cast<llvm::DIType>(I->second)->isForwardDecl())
	return;
	llvm::DIType *Res = CreateTypeDefinition(Ty->castAs<RecordType>());
	assert(!Res->isForwardDecl());
	TypeCache[TyPtr].reset(Res);
	}

	static bool hasExplicitMemberDefinition(CXXRecordDecl::method_iterator I,
	CXXRecordDecl::method_iterator End) {
	for (CXXMethodDecl *MD : llvm::make_range(I, End))
	if (FunctionDecl *Tmpl = MD->getInstantiatedFromMemberFunction())
	if (!Tmpl->isImplicit() && Tmpl->isThisDeclarationADefinition() &&
	!MD->getMemberSpecializationInfo()->isExplicitSpecialization())
	return true;
	return false;
	}

	static bool shouldOmitDefinition(codegenoptions::DebugInfoKind DebugKind,
	bool DebugTypeExtRefs, const RecordDecl *RD,
	const LangOptions &LangOpts) {
	if (DebugTypeExtRefs && isDefinedInClangModule(RD->getDefinition()))
	return true;

	if (auto *ES = RD->getASTContext().getExternalSource())
	if (ES->hasExternalDefinitions(RD) == ExternalASTSource::EK_Always)
	return true;

	if (DebugKind > codegenoptions::LimitedDebugInfo)
	return false;

	if (!LangOpts.CPlusPlus)
	return false;

	if (!RD->isCompleteDefinitionRequired())
	return true;

	const auto *CXXDecl = dyn_cast<CXXRecordDecl>(RD);

	if (!CXXDecl)
	return false;

	// Only emit complete debug info for a dynamic class when its vtable is
	// emitted. However, Microsoft debuggers don't resolve type information
	// across DLL boundaries, so skip this optimization if the class or any of its
	// methods are marked dllimport. This isn't a complete solution, since objects
	// without any dllimport methods can be used in one DLL and constructed in
	// another, but it is the current behavior of LimitedDebugInfo.
	if (CXXDecl->hasDefinition() && CXXDecl->isDynamicClass() &&
	!isClassOrMethodDLLImport(CXXDecl))
	return true;

	TemplateSpecializationKind Spec = TSK_Undeclared;
	if (const auto *SD = dyn_cast<ClassTemplateSpecializationDecl>(RD))
	Spec = SD->getSpecializationKind();

	if (Spec == TSK_ExplicitInstantiationDeclaration &&
	hasExplicitMemberDefinition(CXXDecl->method_begin(),
	CXXDecl->method_end()))
	return true;

	return false;
	}

	void CGDebugInfo::completeRequiredType(const RecordDecl *RD) {
	if (shouldOmitDefinition(DebugKind, DebugTypeExtRefs, RD, CGM.getLangOpts()))
	return;

	QualType Ty = CGM.getContext().getRecordType(RD);
	llvm::DIType *T = getTypeOrNull(Ty);
	if (T && T->isForwardDecl())
	completeClassData(RD);
	}

	llvm::DIType CGDebugInfo::CreateType(const RecordType Ty) {
	RecordDecl *RD = Ty->getDecl();
	llvm::DIType *T = cast_or_null<llvm::DIType>(getTypeOrNull(QualType(Ty, 0)));
	if (T \|\| shouldOmitDefinition(DebugKind, DebugTypeExtRefs, RD,
	CGM.getLangOpts())) {
	if (!T)
	T = getOrCreateRecordFwdDecl(Ty, getDeclContextDescriptor(RD));
	return T;
	}

	return CreateTypeDefinition(Ty);
	}

	llvm::DIType CGDebugInfo::CreateTypeDefinition(const RecordType Ty) {
	RecordDecl *RD = Ty->getDecl();

	// Get overall information about the record type for the debug info.
	llvm::DIFile *DefUnit = getOrCreateFile(RD->getLocation());

	// Records and classes and unions can all be recursive. To handle them, we
	// first generate a debug descriptor for the struct as a forward declaration.
	// Then (if it is a definition) we go through and get debug info for all of
	// its members. Finally, we create a descriptor for the complete type (which
	// may refer to the forward decl if the struct is recursive) and replace all
	// uses of the forward declaration with the final definition.
	llvm::DICompositeType *FwdDecl = getOrCreateLimitedType(Ty, DefUnit);

	const RecordDecl *D = RD->getDefinition();
	if (!D \|\| !D->isCompleteDefinition())
	return FwdDecl;

	if (const auto *CXXDecl = dyn_cast<CXXRecordDecl>(RD))
	CollectContainingType(CXXDecl, FwdDecl);

	// Push the struct on region stack.
	LexicalBlockStack.emplace_back(&*FwdDecl);
	RegionMap[Ty->getDecl()].reset(FwdDecl);

	// Convert all the elements.
	SmallVector<llvm::Metadata *, 16> EltTys;
	// what about nested types?

	// Note: The split of CXXDecl information here is intentional, the
	// gdb tests will depend on a certain ordering at printout. The debug
	// information offsets are still correct if we merge them all together
	// though.
	const auto *CXXDecl = dyn_cast<CXXRecordDecl>(RD);
	if (CXXDecl) {
	CollectCXXBases(CXXDecl, DefUnit, EltTys, FwdDecl);
	CollectVTableInfo(CXXDecl, DefUnit, EltTys, FwdDecl);
	}

	// Collect data fields (including static variables and any initializers).
	CollectRecordFields(RD, DefUnit, EltTys, FwdDecl);
	if (CXXDecl)
	CollectCXXMemberFunctions(CXXDecl, DefUnit, EltTys, FwdDecl);

	LexicalBlockStack.pop_back();
	RegionMap.erase(Ty->getDecl());

	llvm::DINodeArray Elements = DBuilder.getOrCreateArray(EltTys);
	DBuilder.replaceArrays(FwdDecl, Elements);

	if (FwdDecl->isTemporary())
	FwdDecl =
	llvm::MDNode::replaceWithPermanent(llvm::TempDICompositeType(FwdDecl));

	RegionMap[Ty->getDecl()].reset(FwdDecl);
	return FwdDecl;
	}

	llvm::DIType CGDebugInfo::CreateType(const ObjCObjectType Ty,
	llvm::DIFile *Unit) {
	// Ignore protocols.
	return getOrCreateType(Ty->getBaseType(), Unit);
	}

	llvm::DIType CGDebugInfo::CreateType(const ObjCTypeParamType Ty,
	llvm::DIFile *Unit) {
	// Ignore protocols.
	SourceLocation Loc = Ty->getDecl()->getLocation();

	// Use Typedefs to represent ObjCTypeParamType.
	return DBuilder.createTypedef(
	getOrCreateType(Ty->getDecl()->getUnderlyingType(), Unit),
	Ty->getDecl()->getName(), getOrCreateFile(Loc), getLineNumber(Loc),
	getDeclContextDescriptor(Ty->getDecl()));
	}

	/// \return true if Getter has the default name for the property PD.
	static bool hasDefaultGetterName(const ObjCPropertyDecl *PD,
	const ObjCMethodDecl *Getter) {
	assert(PD);
	if (!Getter)
	return true;

	assert(Getter->getDeclName().isObjCZeroArgSelector());
	return PD->getName() ==
	Getter->getDeclName().getObjCSelector().getNameForSlot(0);
	}

	/// \return true if Setter has the default name for the property PD.
	static bool hasDefaultSetterName(const ObjCPropertyDecl *PD,
	const ObjCMethodDecl *Setter) {
	assert(PD);
	if (!Setter)
	return true;

	assert(Setter->getDeclName().isObjCOneArgSelector());
	return SelectorTable::constructSetterName(PD->getName()) ==
	Setter->getDeclName().getObjCSelector().getNameForSlot(0);
	}

	llvm::DIType CGDebugInfo::CreateType(const ObjCInterfaceType Ty,
	llvm::DIFile *Unit) {
	ObjCInterfaceDecl *ID = Ty->getDecl();
	if (!ID)
	return nullptr;

	// Return a forward declaration if this type was imported from a clang module,
	// and this is not the compile unit with the implementation of the type (which
	// may contain hidden ivars).
	if (DebugTypeExtRefs && ID->isFromASTFile() && ID->getDefinition() &&
	!ID->getImplementation())
	return DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
	ID->getName(),
	getDeclContextDescriptor(ID), Unit, 0);

	// Get overall information about the record type for the debug info.
	llvm::DIFile *DefUnit = getOrCreateFile(ID->getLocation());
	unsigned Line = getLineNumber(ID->getLocation());
	auto RuntimeLang =
	static_cast<llvm::dwarf::SourceLanguage>(TheCU->getSourceLanguage());

	// If this is just a forward declaration return a special forward-declaration
	// debug type since we won't be able to lay out the entire type.
	ObjCInterfaceDecl *Def = ID->getDefinition();
	if (!Def \|\| !Def->getImplementation()) {
	llvm::DIScope *Mod = getParentModuleOrNull(ID);
	llvm::DIType *FwdDecl = DBuilder.createReplaceableCompositeType(
	llvm::dwarf::DW_TAG_structure_type, ID->getName(), Mod ? Mod : TheCU,
	DefUnit, Line, RuntimeLang);
	ObjCInterfaceCache.push_back(ObjCInterfaceCacheEntry(Ty, FwdDecl, Unit));
	return FwdDecl;
	}

	return CreateTypeDefinition(Ty, Unit);
	}

	llvm::DIModule *
	CGDebugInfo::getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod,
	bool CreateSkeletonCU) {
	// Use the Module pointer as the key into the cache. This is a
	// nullptr if the "Module" is a PCH, which is safe because we don't
	// support chained PCH debug info, so there can only be a single PCH.
	const Module *M = Mod.getModuleOrNull();
	auto ModRef = ModuleCache.find(M);
	if (ModRef != ModuleCache.end())
	return cast<llvm::DIModule>(ModRef->second);

	// Macro definitions that were defined with "-D" on the command line.
	SmallString<128> ConfigMacros;
	{
	llvm::raw_svector_ostream OS(ConfigMacros);
	const auto &PPOpts = CGM.getPreprocessorOpts();
	unsigned I = 0;
	// Translate the macro definitions back into a command line.
	for (auto &M : PPOpts.Macros) {
	if (++I > 1)
	OS << " ";
	const std::string &Macro = M.first;
	bool Undef = M.second;
	OS << "\"-" << (Undef ? 'U' : 'D');
	for (char c : Macro)
	switch (c) {
	case '\\':
	OS << "\\\\";
	break;
	case '"':
	OS << "\\\"";
	break;
	default:
	OS << c;
	}
	OS << '\"';
	}
	}

	bool IsRootModule = M ? !M->Parent : true;
	if (CreateSkeletonCU && IsRootModule) {
	// PCH files don't have a signature field in the control block,
	// but LLVM detects skeleton CUs by looking for a non-zero DWO id.
	// We use the lower 64 bits for debug info.
	uint64_t Signature =
	Mod.getSignature()
	? (uint64_t)Mod.getSignature()[1] << 32 \| Mod.getSignature()[0]
	: ~1ULL;
	llvm::DIBuilder DIB(CGM.getModule());
	DIB.createCompileUnit(TheCU->getSourceLanguage(),
	// TODO: Support "Source" from external AST providers?
	DIB.createFile(Mod.getModuleName(), Mod.getPath()),
	TheCU->getProducer(), true, StringRef(), 0,
	Mod.getASTFile(), llvm::DICompileUnit::FullDebug,
	Signature);
	DIB.finalize();
	}
	llvm::DIModule *Parent =
	IsRootModule ? nullptr
	: getOrCreateModuleRef(
	ExternalASTSource::ASTSourceDescriptor(*M->Parent),
	CreateSkeletonCU);
	llvm::DIModule *DIMod =
	DBuilder.createModule(Parent, Mod.getModuleName(), ConfigMacros,
	Mod.getPath(), CGM.getHeaderSearchOpts().Sysroot);
	ModuleCache[M].reset(DIMod);
	return DIMod;
	}

	llvm::DIType CGDebugInfo::CreateTypeDefinition(const ObjCInterfaceType Ty,
	llvm::DIFile *Unit) {
	ObjCInterfaceDecl *ID = Ty->getDecl();
	llvm::DIFile *DefUnit = getOrCreateFile(ID->getLocation());
	unsigned Line = getLineNumber(ID->getLocation());
	unsigned RuntimeLang = TheCU->getSourceLanguage();

	// Bit size, align and offset of the type.
	uint64_t Size = CGM.getContext().getTypeSize(Ty);
	auto Align = getTypeAlignIfRequired(Ty, CGM.getContext());

	llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero;
	if (ID->getImplementation())
	Flags \|= llvm::DINode::FlagObjcClassComplete;

	llvm::DIScope *Mod = getParentModuleOrNull(ID);
	llvm::DICompositeType *RealDecl = DBuilder.createStructType(
	Mod ? Mod : Unit, ID->getName(), DefUnit, Line, Size, Align, Flags,
	nullptr, llvm::DINodeArray(), RuntimeLang);

	QualType QTy(Ty, 0);
	TypeCache[QTy.getAsOpaquePtr()].reset(RealDecl);

	// Push the struct on region stack.
	LexicalBlockStack.emplace_back(RealDecl);
	RegionMap[Ty->getDecl()].reset(RealDecl);

	// Convert all the elements.
	SmallVector<llvm::Metadata *, 16> EltTys;

	ObjCInterfaceDecl *SClass = ID->getSuperClass();
	if (SClass) {
	llvm::DIType *SClassTy =
	getOrCreateType(CGM.getContext().getObjCInterfaceType(SClass), Unit);
	if (!SClassTy)
	return nullptr;

	llvm::DIType *InhTag = DBuilder.createInheritance(RealDecl, SClassTy, 0, 0,
	llvm::DINode::FlagZero);
	EltTys.push_back(InhTag);
	}

	// Create entries for all of the properties.
	auto AddProperty = [&](const ObjCPropertyDecl *PD) {
	SourceLocation Loc = PD->getLocation();
	llvm::DIFile *PUnit = getOrCreateFile(Loc);
	unsigned PLine = getLineNumber(Loc);
	ObjCMethodDecl *Getter = PD->getGetterMethodDecl();
	ObjCMethodDecl *Setter = PD->getSetterMethodDecl();
	llvm::MDNode *PropertyNode = DBuilder.createObjCProperty(
	PD->getName(), PUnit, PLine,
	hasDefaultGetterName(PD, Getter) ? ""
	: getSelectorName(PD->getGetterName()),
	hasDefaultSetterName(PD, Setter) ? ""
	: getSelectorName(PD->getSetterName()),
	PD->getPropertyAttributes(), getOrCreateType(PD->getType(), PUnit));
	EltTys.push_back(PropertyNode);
	};
	{
	llvm::SmallPtrSet<const IdentifierInfo *, 16> PropertySet;
	for (const ObjCCategoryDecl *ClassExt : ID->known_extensions())
	for (auto *PD : ClassExt->properties()) {
	PropertySet.insert(PD->getIdentifier());
	AddProperty(PD);
	}
	for (const auto *PD : ID->properties()) {
	// Don't emit duplicate metadata for properties that were already in a
	// class extension.
	if (!PropertySet.insert(PD->getIdentifier()).second)
	continue;
	AddProperty(PD);
	}
	}

	const ASTRecordLayout &RL = CGM.getContext().getASTObjCInterfaceLayout(ID);
	unsigned FieldNo = 0;
	for (ObjCIvarDecl *Field = ID->all_declared_ivar_begin(); Field;
	Field = Field->getNextIvar(), ++FieldNo) {
	llvm::DIType *FieldTy = getOrCreateType(Field->getType(), Unit);
	if (!FieldTy)
	return nullptr;

	StringRef FieldName = Field->getName();

	// Ignore unnamed fields.
	if (FieldName.empty())
	continue;

	// Get the location for the field.
	llvm::DIFile *FieldDefUnit = getOrCreateFile(Field->getLocation());
	unsigned FieldLine = getLineNumber(Field->getLocation());
	QualType FType = Field->getType();
	uint64_t FieldSize = 0;
	uint32_t FieldAlign = 0;

	if (!FType->isIncompleteArrayType()) {

	// Bit size, align and offset of the type.
	FieldSize = Field->isBitField()
	? Field->getBitWidthValue(CGM.getContext())
	: CGM.getContext().getTypeSize(FType);
	FieldAlign = getTypeAlignIfRequired(FType, CGM.getContext());
	}

	uint64_t FieldOffset;
	if (CGM.getLangOpts().ObjCRuntime.isNonFragile()) {
	// We don't know the runtime offset of an ivar if we're using the
	// non-fragile ABI. For bitfields, use the bit offset into the first
	// byte of storage of the bitfield. For other fields, use zero.
	if (Field->isBitField()) {
	FieldOffset =
	CGM.getObjCRuntime().ComputeBitfieldBitOffset(CGM, ID, Field);
	FieldOffset %= CGM.getContext().getCharWidth();
	} else {
	FieldOffset = 0;
	}
	} else {
	FieldOffset = RL.getFieldOffset(FieldNo);
	}

	llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero;
	if (Field->getAccessControl() == ObjCIvarDecl::Protected)
	Flags = llvm::DINode::FlagProtected;
	else if (Field->getAccessControl() == ObjCIvarDecl::Private)
	Flags = llvm::DINode::FlagPrivate;
	else if (Field->getAccessControl() == ObjCIvarDecl::Public)
	Flags = llvm::DINode::FlagPublic;

	llvm::MDNode *PropertyNode = nullptr;
	if (ObjCImplementationDecl *ImpD = ID->getImplementation()) {
	if (ObjCPropertyImplDecl *PImpD =
	ImpD->FindPropertyImplIvarDecl(Field->getIdentifier())) {
	if (ObjCPropertyDecl *PD = PImpD->getPropertyDecl()) {
	SourceLocation Loc = PD->getLocation();
	llvm::DIFile *PUnit = getOrCreateFile(Loc);
	unsigned PLine = getLineNumber(Loc);
	ObjCMethodDecl *Getter = PD->getGetterMethodDecl();
	ObjCMethodDecl *Setter = PD->getSetterMethodDecl();
	PropertyNode = DBuilder.createObjCProperty(
	PD->getName(), PUnit, PLine,
	hasDefaultGetterName(PD, Getter)
	? ""
	: getSelectorName(PD->getGetterName()),
	hasDefaultSetterName(PD, Setter)
	? ""
	: getSelectorName(PD->getSetterName()),
	PD->getPropertyAttributes(),
	getOrCreateType(PD->getType(), PUnit));
	}
	}
	}
	FieldTy = DBuilder.createObjCIVar(FieldName, FieldDefUnit, FieldLine,
	FieldSize, FieldAlign, FieldOffset, Flags,
	FieldTy, PropertyNode);
	EltTys.push_back(FieldTy);
	}

	llvm::DINodeArray Elements = DBuilder.getOrCreateArray(EltTys);
	DBuilder.replaceArrays(RealDecl, Elements);

	LexicalBlockStack.pop_back();
	return RealDecl;
	}

	llvm::DIType CGDebugInfo::CreateType(const VectorType Ty,
	llvm::DIFile *Unit) {
	llvm::DIType *ElementTy = getOrCreateType(Ty->getElementType(), Unit);
	int64_t Count = Ty->getNumElements();

	llvm::Metadata *Subscript;
	QualType QTy(Ty, 0);
	auto SizeExpr = SizeExprCache.find(QTy);
	if (SizeExpr != SizeExprCache.end())
	Subscript = DBuilder.getOrCreateSubrange(0, SizeExpr->getSecond());
	else
	Subscript = DBuilder.getOrCreateSubrange(0, Count ? Count : -1);
	llvm::DINodeArray SubscriptArray = DBuilder.getOrCreateArray(Subscript);

	uint64_t Size = CGM.getContext().getTypeSize(Ty);
	auto Align = getTypeAlignIfRequired(Ty, CGM.getContext());

	return DBuilder.createVectorType(Size, Align, ElementTy, SubscriptArray);
	}

	llvm::DIType CGDebugInfo::CreateType(const ArrayType Ty, llvm::DIFile *Unit) {
	uint64_t Size;
	uint32_t Align;

	// FIXME: make getTypeAlign() aware of VLAs and incomplete array types
	if (const auto *VAT = dyn_cast<VariableArrayType>(Ty)) {
	Size = 0;
	Align = getTypeAlignIfRequired(CGM.getContext().getBaseElementType(VAT),
	CGM.getContext());
	} else if (Ty->isIncompleteArrayType()) {
	Size = 0;
	if (Ty->getElementType()->isIncompleteType())
	Align = 0;
	else
	Align = getTypeAlignIfRequired(Ty->getElementType(), CGM.getContext());
	} else if (Ty->isIncompleteType()) {
	Size = 0;
	Align = 0;
	} else {
	// Size and align of the whole array, not the element type.
	Size = CGM.getContext().getTypeSize(Ty);
	Align = getTypeAlignIfRequired(Ty, CGM.getContext());
	}

	// Add the dimensions of the array. FIXME: This loses CV qualifiers from
	// interior arrays, do we care? Why aren't nested arrays represented the
	// obvious/recursive way?
	SmallVector<llvm::Metadata *, 8> Subscripts;
	QualType EltTy(Ty, 0);
	while ((Ty = dyn_cast<ArrayType>(EltTy))) {
	// If the number of elements is known, then count is that number. Otherwise,
	// it's -1. This allows us to represent a subrange with an array of 0
	// elements, like this:
	//
	// struct foo {
	// int x[0];
	// };
	int64_t Count = -1; // Count == -1 is an unbounded array.
	if (const auto *CAT = dyn_cast<ConstantArrayType>(Ty))
	Count = CAT->getSize().getZExtValue();
	else if (const auto *VAT = dyn_cast<VariableArrayType>(Ty)) {
	if (Expr *Size = VAT->getSizeExpr()) {
	Expr::EvalResult Result;
	if (Size->EvaluateAsInt(Result, CGM.getContext()))
	Count = Result.Val.getInt().getExtValue();
	}
	}

	auto SizeNode = SizeExprCache.find(EltTy);
	if (SizeNode != SizeExprCache.end())
	Subscripts.push_back(
	DBuilder.getOrCreateSubrange(0, SizeNode->getSecond()));
	else
	Subscripts.push_back(DBuilder.getOrCreateSubrange(0, Count));
	EltTy = Ty->getElementType();
	}

	llvm::DINodeArray SubscriptArray = DBuilder.getOrCreateArray(Subscripts);

	return DBuilder.createArrayType(Size, Align, getOrCreateType(EltTy, Unit),
	SubscriptArray);
	}

	llvm::DIType CGDebugInfo::CreateType(const LValueReferenceType Ty,
	llvm::DIFile *Unit) {
	return CreatePointerLikeType(llvm::dwarf::DW_TAG_reference_type, Ty,
	Ty->getPointeeType(), Unit);
	}

	llvm::DIType CGDebugInfo::CreateType(const RValueReferenceType Ty,
	llvm::DIFile *Unit) {
	return CreatePointerLikeType(llvm::dwarf::DW_TAG_rvalue_reference_type, Ty,
	Ty->getPointeeType(), Unit);
	}

	llvm::DIType CGDebugInfo::CreateType(const MemberPointerType Ty,
	llvm::DIFile *U) {
	llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero;
	uint64_t Size = 0;

	if (!Ty->isIncompleteType()) {
	Size = CGM.getContext().getTypeSize(Ty);

	// Set the MS inheritance model. There is no flag for the unspecified model.
	if (CGM.getTarget().getCXXABI().isMicrosoft()) {
	switch (Ty->getMostRecentCXXRecordDecl()->getMSInheritanceModel()) {
	case MSInheritanceAttr::Keyword_single_inheritance:
	Flags \|= llvm::DINode::FlagSingleInheritance;
	break;
	case MSInheritanceAttr::Keyword_multiple_inheritance:
	Flags \|= llvm::DINode::FlagMultipleInheritance;
	break;
	case MSInheritanceAttr::Keyword_virtual_inheritance:
	Flags \|= llvm::DINode::FlagVirtualInheritance;
	break;
	case MSInheritanceAttr::Keyword_unspecified_inheritance:
	break;
	}
	}
	}

	llvm::DIType *ClassType = getOrCreateType(QualType(Ty->getClass(), 0), U);
	if (Ty->isMemberDataPointerType())
	return DBuilder.createMemberPointerType(
	getOrCreateType(Ty->getPointeeType(), U), ClassType, Size, /Align=/0,
	Flags);

	const FunctionProtoType *FPT =
	Ty->getPointeeType()->getAs<FunctionProtoType>();
	return DBuilder.createMemberPointerType(
	getOrCreateInstanceMethodType(
	CXXMethodDecl::getThisType(FPT, Ty->getMostRecentCXXRecordDecl()),
	FPT, U),
	ClassType, Size, /Align=/0, Flags);
	}

	llvm::DIType CGDebugInfo::CreateType(const AtomicType Ty, llvm::DIFile *U) {
	auto *FromTy = getOrCreateType(Ty->getValueType(), U);
	return DBuilder.createQualifiedType(llvm::dwarf::DW_TAG_atomic_type, FromTy);
	}

	llvm::DIType CGDebugInfo::CreateType(const PipeType Ty, llvm::DIFile *U) {
	return getOrCreateType(Ty->getElementType(), U);
	}

	llvm::DIType CGDebugInfo::CreateEnumType(const EnumType Ty) {
	const EnumDecl *ED = Ty->getDecl();

	uint64_t Size = 0;
	uint32_t Align = 0;
	if (!ED->getTypeForDecl()->isIncompleteType()) {
	Size = CGM.getContext().getTypeSize(ED->getTypeForDecl());
	Align = getDeclAlignIfRequired(ED, CGM.getContext());
	}

	SmallString<256> Identifier = getTypeIdentifier(Ty, CGM, TheCU);

	bool isImportedFromModule =
	DebugTypeExtRefs && ED->isFromASTFile() && ED->getDefinition();

	// If this is just a forward declaration, construct an appropriately
	// marked node and just return it.
	if (isImportedFromModule \|\| !ED->getDefinition()) {
	// Note that it is possible for enums to be created as part of
	// their own declcontext. In this case a FwdDecl will be created
	// twice. This doesn't cause a problem because both FwdDecls are
	// entered into the ReplaceMap: finalize() will replace the first
	// FwdDecl with the second and then replace the second with
	// complete type.
	llvm::DIScope *EDContext = getDeclContextDescriptor(ED);
	llvm::DIFile *DefUnit = getOrCreateFile(ED->getLocation());
	llvm::TempDIScope TmpContext(DBuilder.createReplaceableCompositeType(
	llvm::dwarf::DW_TAG_enumeration_type, "", TheCU, DefUnit, 0));

	unsigned Line = getLineNumber(ED->getLocation());
	StringRef EDName = ED->getName();
	llvm::DIType *RetTy = DBuilder.createReplaceableCompositeType(
	llvm::dwarf::DW_TAG_enumeration_type, EDName, EDContext, DefUnit, Line,
	0, Size, Align, llvm::DINode::FlagFwdDecl, Identifier);

	ReplaceMap.emplace_back(
	std::piecewise_construct, std::make_tuple(Ty),
	std::make_tuple(static_cast<llvm::Metadata *>(RetTy)));
	return RetTy;
	}

	return CreateTypeDefinition(Ty);
	}

	llvm::DIType CGDebugInfo::CreateTypeDefinition(const EnumType Ty) {
	const EnumDecl *ED = Ty->getDecl();
	uint64_t Size = 0;
	uint32_t Align = 0;
	if (!ED->getTypeForDecl()->isIncompleteType()) {
	Size = CGM.getContext().getTypeSize(ED->getTypeForDecl());
	Align = getDeclAlignIfRequired(ED, CGM.getContext());
	}

	SmallString<256> Identifier = getTypeIdentifier(Ty, CGM, TheCU);

	// Create elements for each enumerator.
	SmallVector<llvm::Metadata *, 16> Enumerators;
	ED = ED->getDefinition();
	bool IsSigned = ED->getIntegerType()->isSignedIntegerType();
	for (const auto *Enum : ED->enumerators()) {
	const auto &InitVal = Enum->getInitVal();
	auto Value = IsSigned ? InitVal.getSExtValue() : InitVal.getZExtValue();
	Enumerators.push_back(
	DBuilder.createEnumerator(Enum->getName(), Value, !IsSigned));
	}

	// Return a CompositeType for the enum itself.
	llvm::DINodeArray EltArray = DBuilder.getOrCreateArray(Enumerators);

	llvm::DIFile *DefUnit = getOrCreateFile(ED->getLocation());
	unsigned Line = getLineNumber(ED->getLocation());
	llvm::DIScope *EnumContext = getDeclContextDescriptor(ED);
	llvm::DIType *ClassTy = getOrCreateType(ED->getIntegerType(), DefUnit);
	return DBuilder.createEnumerationType(EnumContext, ED->getName(), DefUnit,
	Line, Size, Align, EltArray, ClassTy,
	Identifier, ED->isScoped());
	}

	llvm::DIMacro CGDebugInfo::CreateMacro(llvm::DIMacroFile Parent,
	unsigned MType, SourceLocation LineLoc,
	StringRef Name, StringRef Value) {
	unsigned Line = LineLoc.isInvalid() ? 0 : getLineNumber(LineLoc);
	return DBuilder.createMacro(Parent, Line, MType, Name, Value);
	}

	llvm::DIMacroFile CGDebugInfo::CreateTempMacroFile(llvm::DIMacroFile Parent,
	SourceLocation LineLoc,
	SourceLocation FileLoc) {
	llvm::DIFile *FName = getOrCreateFile(FileLoc);
	unsigned Line = LineLoc.isInvalid() ? 0 : getLineNumber(LineLoc);
	return DBuilder.createTempMacroFile(Parent, Line, FName);
	}

	static QualType UnwrapTypeForDebugInfo(QualType T, const ASTContext &C) {
	Qualifiers Quals;
	do {
	Qualifiers InnerQuals = T.getLocalQualifiers();
	// Qualifiers::operator+() doesn't like it if you add a Qualifier
	// that is already there.
	Quals += Qualifiers::removeCommonQualifiers(Quals, InnerQuals);
	Quals += InnerQuals;
	QualType LastT = T;
	switch (T->getTypeClass()) {
	default:
	return C.getQualifiedType(T.getTypePtr(), Quals);
	case Type::TemplateSpecialization: {
	const auto *Spec = cast<TemplateSpecializationType>(T);
	if (Spec->isTypeAlias())
	return C.getQualifiedType(T.getTypePtr(), Quals);
	T = Spec->desugar();
	break;
	}
	case Type::TypeOfExpr:
	T = cast<TypeOfExprType>(T)->getUnderlyingExpr()->getType();
	break;
	case Type::TypeOf:
	T = cast<TypeOfType>(T)->getUnderlyingType();
	break;
	case Type::Decltype:
	T = cast<DecltypeType>(T)->getUnderlyingType();
	break;
	case Type::UnaryTransform:
	T = cast<UnaryTransformType>(T)->getUnderlyingType();
	break;
	case Type::Attributed:
	T = cast<AttributedType>(T)->getEquivalentType();
	break;
	case Type::Elaborated:
	T = cast<ElaboratedType>(T)->getNamedType();
	break;
	case Type::Paren:
	T = cast<ParenType>(T)->getInnerType();
	break;
	case Type::SubstTemplateTypeParm:
	T = cast<SubstTemplateTypeParmType>(T)->getReplacementType();
	break;
	case Type::Auto:
	case Type::DeducedTemplateSpecialization: {
	QualType DT = cast<DeducedType>(T)->getDeducedType();
	assert(!DT.isNull() && "Undeduced types shouldn't reach here.");
	T = DT;
	break;
	}
	case Type::Adjusted:
	case Type::Decayed:
	// Decayed and adjusted types use the adjusted type in LLVM and DWARF.
	T = cast<AdjustedType>(T)->getAdjustedType();
	break;
	}

	assert(T != LastT && "Type unwrapping failed to unwrap!");
	(void)LastT;
	} while (true);
	}

	llvm::DIType *CGDebugInfo::getTypeOrNull(QualType Ty) {

	// Unwrap the type as needed for debug information.
	Ty = UnwrapTypeForDebugInfo(Ty, CGM.getContext());

	auto It = TypeCache.find(Ty.getAsOpaquePtr());
	if (It != TypeCache.end()) {
	// Verify that the debug info still exists.
	if (llvm::Metadata *V = It->second)
	return cast<llvm::DIType>(V);
	}

	return nullptr;
	}

	void CGDebugInfo::completeTemplateDefinition(
	const ClassTemplateSpecializationDecl &SD) {
	if (DebugKind <= codegenoptions::DebugLineTablesOnly)
	return;
	completeUnusedClass(SD);
	}

	void CGDebugInfo::completeUnusedClass(const CXXRecordDecl &D) {
	if (DebugKind <= codegenoptions::DebugLineTablesOnly)
	return;

	completeClassData(&D);
	// In case this type has no member function definitions being emitted, ensure
	// it is retained
	RetainedTypes.push_back(CGM.getContext().getRecordType(&D).getAsOpaquePtr());
	}

	llvm::DIType CGDebugInfo::getOrCreateType(QualType Ty, llvm::DIFile Unit) {
	if (Ty.isNull())
	return nullptr;

	// Unwrap the type as needed for debug information.
	Ty = UnwrapTypeForDebugInfo(Ty, CGM.getContext());

	if (auto *T = getTypeOrNull(Ty))
	return T;

	llvm::DIType *Res = CreateTypeNode(Ty, Unit);
	void *TyPtr = Ty.getAsOpaquePtr();

	// And update the type cache.
	TypeCache[TyPtr].reset(Res);

	return Res;
	}

	llvm::DIModule CGDebugInfo::getParentModuleOrNull(const Decl D) {
	// A forward declaration inside a module header does not belong to the module.
	if (isa<RecordDecl>(D) && !cast<RecordDecl>(D)->getDefinition())
	return nullptr;
	if (DebugTypeExtRefs && D->isFromASTFile()) {
	// Record a reference to an imported clang module or precompiled header.
	auto *Reader = CGM.getContext().getExternalSource();
	auto Idx = D->getOwningModuleID();
	auto Info = Reader->getSourceDescriptor(Idx);
	if (Info)
	return getOrCreateModuleRef(Info, /SkeletonCU=*/true);
	} else if (ClangModuleMap) {
	// We are building a clang module or a precompiled header.
	//
	// TODO: When D is a CXXRecordDecl or a C++ Enum, the ODR applies
	// and it wouldn't be necessary to specify the parent scope
	// because the type is already unique by definition (it would look
	// like the output of -fno-standalone-debug). On the other hand,
	// the parent scope helps a consumer to quickly locate the object
	// file where the type's definition is located, so it might be
	// best to make this behavior a command line or debugger tuning
	// option.
	if (Module *M = D->getOwningModule()) {
	// This is a (sub-)module.
	auto Info = ExternalASTSource::ASTSourceDescriptor(*M);
	return getOrCreateModuleRef(Info, /SkeletonCU=/false);
	} else {
	// This the precompiled header being built.
	return getOrCreateModuleRef(PCHDescriptor, /SkeletonCU=/false);
	}
	}

	return nullptr;
	}

	llvm::DIType CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile Unit) {
	// Handle qualifiers, which recursively handles what they refer to.
	if (Ty.hasLocalQualifiers())
	return CreateQualifiedType(Ty, Unit);

	// Work out details of type.
	switch (Ty->getTypeClass()) {
	#define TYPE(Class, Base)
	#define ABSTRACT_TYPE(Class, Base)
	#define NON_CANONICAL_TYPE(Class, Base)
	#define DEPENDENT_TYPE(Class, Base) case Type::Class:
	#include "clang/AST/TypeNodes.def"
	llvm_unreachable("Dependent types cannot show up in debug information");

	case Type::ExtVector:
	case Type::Vector:
	return CreateType(cast<VectorType>(Ty), Unit);
	case Type::ObjCObjectPointer:
	return CreateType(cast<ObjCObjectPointerType>(Ty), Unit);
	case Type::ObjCObject:
	return CreateType(cast<ObjCObjectType>(Ty), Unit);
	case Type::ObjCTypeParam:
	return CreateType(cast<ObjCTypeParamType>(Ty), Unit);
	case Type::ObjCInterface:
	return CreateType(cast<ObjCInterfaceType>(Ty), Unit);
	case Type::Builtin:
	return CreateType(cast<BuiltinType>(Ty));
	case Type::Complex:
	return CreateType(cast<ComplexType>(Ty));
	case Type::Pointer:
	return CreateType(cast<PointerType>(Ty), Unit);
	case Type::BlockPointer:
	return CreateType(cast<BlockPointerType>(Ty), Unit);
	case Type::Typedef:
	return CreateType(cast<TypedefType>(Ty), Unit);
	case Type::Record:
	return CreateType(cast<RecordType>(Ty));
	case Type::Enum:
	return CreateEnumType(cast<EnumType>(Ty));
	case Type::FunctionProto:
	case Type::FunctionNoProto:
	return CreateType(cast<FunctionType>(Ty), Unit);
	case Type::ConstantArray:
	case Type::VariableArray:
	case Type::IncompleteArray:
	return CreateType(cast<ArrayType>(Ty), Unit);

	case Type::LValueReference:
	return CreateType(cast<LValueReferenceType>(Ty), Unit);
	case Type::RValueReference:
	return CreateType(cast<RValueReferenceType>(Ty), Unit);

	case Type::MemberPointer:
	return CreateType(cast<MemberPointerType>(Ty), Unit);

	case Type::Atomic:
	return CreateType(cast<AtomicType>(Ty), Unit);

	case Type::Pipe:
	return CreateType(cast<PipeType>(Ty), Unit);

	case Type::TemplateSpecialization:
	return CreateType(cast<TemplateSpecializationType>(Ty), Unit);

	case Type::Auto:
	case Type::Attributed:
	case Type::Adjusted:
	case Type::Decayed:
	case Type::DeducedTemplateSpecialization:
	case Type::Elaborated:
	case Type::Paren:
	case Type::SubstTemplateTypeParm:
	case Type::TypeOfExpr:
	case Type::TypeOf:
	case Type::Decltype:
	case Type::UnaryTransform:
	case Type::PackExpansion:
	break;
	}

	llvm_unreachable("type should have been unwrapped!");
	}

	llvm::DICompositeType CGDebugInfo::getOrCreateLimitedType(const RecordType Ty,
	llvm::DIFile *Unit) {
	QualType QTy(Ty, 0);

	auto *T = cast_or_null<llvm::DICompositeType>(getTypeOrNull(QTy));

	// We may have cached a forward decl when we could have created
	// a non-forward decl. Go ahead and create a non-forward decl
	// now.
	if (T && !T->isForwardDecl())
	return T;

	// Otherwise create the type.
	llvm::DICompositeType *Res = CreateLimitedType(Ty);

	// Propagate members from the declaration to the definition
	// CreateType(const RecordType*) will overwrite this with the members in the
	// correct order if the full type is needed.
	DBuilder.replaceArrays(Res, T ? T->getElements() : llvm::DINodeArray());

	// And update the type cache.
	TypeCache[QTy.getAsOpaquePtr()].reset(Res);
	return Res;
	}

	// TODO: Currently used for context chains when limiting debug info.
	llvm::DICompositeType CGDebugInfo::CreateLimitedType(const RecordType Ty) {
	RecordDecl *RD = Ty->getDecl();

	// Get overall information about the record type for the debug info.
	llvm::DIFile *DefUnit = getOrCreateFile(RD->getLocation());
	unsigned Line = getLineNumber(RD->getLocation());
	StringRef RDName = getClassName(RD);

	llvm::DIScope *RDContext = getDeclContextDescriptor(RD);

	// If we ended up creating the type during the context chain construction,
	// just return that.
	auto *T = cast_or_null<llvm::DICompositeType>(
	getTypeOrNull(CGM.getContext().getRecordType(RD)));
	if (T && (!T->isForwardDecl() \|\| !RD->getDefinition()))
	return T;

	// If this is just a forward or incomplete declaration, construct an
	// appropriately marked node and just return it.
	const RecordDecl *D = RD->getDefinition();
	if (!D \|\| !D->isCompleteDefinition())
	return getOrCreateRecordFwdDecl(Ty, RDContext);

	uint64_t Size = CGM.getContext().getTypeSize(Ty);
	auto Align = getDeclAlignIfRequired(D, CGM.getContext());

	SmallString<256> Identifier = getTypeIdentifier(Ty, CGM, TheCU);

	// Explicitly record the calling convention for C++ records.
	auto Flags = llvm::DINode::FlagZero;
	if (auto CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
	if (CGM.getCXXABI().getRecordArgABI(CXXRD) == CGCXXABI::RAA_Indirect)
	Flags \|= llvm::DINode::FlagTypePassByReference;
	else
	Flags \|= llvm::DINode::FlagTypePassByValue;

	// Record if a C++ record is trivial type.
	if (CXXRD->isTrivial())
	Flags \|= llvm::DINode::FlagTrivial;
	}

	llvm::DICompositeType *RealDecl = DBuilder.createReplaceableCompositeType(
	getTagForRecord(RD), RDName, RDContext, DefUnit, Line, 0, Size, Align,
	Flags, Identifier);

	// Elements of composite types usually have back to the type, creating
	// uniquing cycles. Distinct nodes are more efficient.
	switch (RealDecl->getTag()) {
	default:
	llvm_unreachable("invalid composite type tag");

	case llvm::dwarf::DW_TAG_array_type:
	case llvm::dwarf::DW_TAG_enumeration_type:
	// Array elements and most enumeration elements don't have back references,
	// so they don't tend to be involved in uniquing cycles and there is some
	// chance of merging them when linking together two modules. Only make
	// them distinct if they are ODR-uniqued.
	if (Identifier.empty())
	break;
	LLVM_FALLTHROUGH;

	case llvm::dwarf::DW_TAG_structure_type:
	case llvm::dwarf::DW_TAG_union_type:
	case llvm::dwarf::DW_TAG_class_type:
	// Immediately resolve to a distinct node.
	RealDecl =
	llvm::MDNode::replaceWithDistinct(llvm::TempDICompositeType(RealDecl));
	break;
	}

	RegionMap[Ty->getDecl()].reset(RealDecl);
	TypeCache[QualType(Ty, 0).getAsOpaquePtr()].reset(RealDecl);

	if (const auto *TSpecial = dyn_cast<ClassTemplateSpecializationDecl>(RD))
	DBuilder.replaceArrays(RealDecl, llvm::DINodeArray(),
	CollectCXXTemplateParams(TSpecial, DefUnit));
	return RealDecl;
	}

	void CGDebugInfo::CollectContainingType(const CXXRecordDecl *RD,
	llvm::DICompositeType *RealDecl) {
	// A class's primary base or the class itself contains the vtable.
	llvm::DICompositeType *ContainingType = nullptr;
	const ASTRecordLayout &RL = CGM.getContext().getASTRecordLayout(RD);
	if (const CXXRecordDecl *PBase = RL.getPrimaryBase()) {
	// Seek non-virtual primary base root.
	while (1) {
	const ASTRecordLayout &BRL = CGM.getContext().getASTRecordLayout(PBase);
	const CXXRecordDecl *PBT = BRL.getPrimaryBase();
	if (PBT && !BRL.isPrimaryBaseVirtual())
	PBase = PBT;
	else
	break;
	}
	ContainingType = cast<llvm::DICompositeType>(
	getOrCreateType(QualType(PBase->getTypeForDecl(), 0),
	getOrCreateFile(RD->getLocation())));
	} else if (RD->isDynamicClass())
	ContainingType = RealDecl;

	DBuilder.replaceVTableHolder(RealDecl, ContainingType);
	}

	llvm::DIType CGDebugInfo::CreateMemberType(llvm::DIFile Unit, QualType FType,
	StringRef Name, uint64_t *Offset) {
	llvm::DIType *FieldTy = CGDebugInfo::getOrCreateType(FType, Unit);
	uint64_t FieldSize = CGM.getContext().getTypeSize(FType);
	auto FieldAlign = getTypeAlignIfRequired(FType, CGM.getContext());
	llvm::DIType *Ty =
	DBuilder.createMemberType(Unit, Name, Unit, 0, FieldSize, FieldAlign,
	*Offset, llvm::DINode::FlagZero, FieldTy);
	*Offset += FieldSize;
	return Ty;
	}

	void CGDebugInfo::collectFunctionDeclProps(GlobalDecl GD, llvm::DIFile *Unit,
	StringRef &Name,
	StringRef &LinkageName,
	llvm::DIScope *&FDContext,
	llvm::DINodeArray &TParamsArray,
	llvm::DINode::DIFlags &Flags) {
	const auto *FD = cast<FunctionDecl>(GD.getDecl());
	Name = getFunctionName(FD);
	// Use mangled name as linkage name for C/C++ functions.
	if (FD->hasPrototype()) {
	LinkageName = CGM.getMangledName(GD);
	Flags \|= llvm::DINode::FlagPrototyped;
	}
	// No need to replicate the linkage name if it isn't different from the
	// subprogram name, no need to have it at all unless coverage is enabled or
	// debug is set to more than just line tables or extra debug info is needed.
	if (LinkageName == Name \|\| (!CGM.getCodeGenOpts().EmitGcovArcs &&
	!CGM.getCodeGenOpts().EmitGcovNotes &&
	!CGM.getCodeGenOpts().DebugInfoForProfiling &&
	DebugKind <= codegenoptions::DebugLineTablesOnly))
	LinkageName = StringRef();

	if (DebugKind >= codegenoptions::LimitedDebugInfo) {
	if (const NamespaceDecl *NSDecl =
	dyn_cast_or_null<NamespaceDecl>(FD->getDeclContext()))
	FDContext = getOrCreateNamespace(NSDecl);
	else if (const RecordDecl *RDecl =
	dyn_cast_or_null<RecordDecl>(FD->getDeclContext())) {
	llvm::DIScope *Mod = getParentModuleOrNull(RDecl);
	FDContext = getContextDescriptor(RDecl, Mod ? Mod : TheCU);
	}
	// Check if it is a noreturn-marked function
	if (FD->isNoReturn())
	Flags \|= llvm::DINode::FlagNoReturn;
	// Collect template parameters.
	TParamsArray = CollectFunctionTemplateParams(FD, Unit);
	}
	}

	void CGDebugInfo::collectVarDeclProps(const VarDecl VD, llvm::DIFile &Unit,
	unsigned &LineNo, QualType &T,
	StringRef &Name, StringRef &LinkageName,
	llvm::MDTuple *&TemplateParameters,
	llvm::DIScope *&VDContext) {
	Unit = getOrCreateFile(VD->getLocation());
	LineNo = getLineNumber(VD->getLocation());

	setLocation(VD->getLocation());

	T = VD->getType();
	if (T->isIncompleteArrayType()) {
	// CodeGen turns int[] into int[1] so we'll do the same here.
	llvm::APInt ConstVal(32, 1);
	QualType ET = CGM.getContext().getAsArrayType(T)->getElementType();

	T = CGM.getContext().getConstantArrayType(ET, ConstVal, ArrayType::Normal,
	0);
	}

	Name = VD->getName();
	if (VD->getDeclContext() && !isa<FunctionDecl>(VD->getDeclContext()) &&
	!isa<ObjCMethodDecl>(VD->getDeclContext()))
	LinkageName = CGM.getMangledName(VD);
	if (LinkageName == Name)
	LinkageName = StringRef();

	if (isa<VarTemplateSpecializationDecl>(VD)) {
	llvm::DINodeArray parameterNodes = CollectVarTemplateParams(VD, &*Unit);
	TemplateParameters = parameterNodes.get();
	} else {
	TemplateParameters = nullptr;
	}

	// Since we emit declarations (DW_AT_members) for static members, place the
	// definition of those static members in the namespace they were declared in
	// in the source code (the lexical decl context).
	// FIXME: Generalize this for even non-member global variables where the
	// declaration and definition may have different lexical decl contexts, once
	// we have support for emitting declarations of (non-member) global variables.
	const DeclContext *DC = VD->isStaticDataMember() ? VD->getLexicalDeclContext()
	: VD->getDeclContext();
	// When a record type contains an in-line initialization of a static data
	// member, and the record type is marked as __declspec(dllexport), an implicit
	// definition of the member will be created in the record context. DWARF
	// doesn't seem to have a nice way to describe this in a form that consumers
	// are likely to understand, so fake the "normal" situation of a definition
	// outside the class by putting it in the global scope.
	if (DC->isRecord())
	DC = CGM.getContext().getTranslationUnitDecl();

	llvm::DIScope *Mod = getParentModuleOrNull(VD);
	VDContext = getContextDescriptor(cast<Decl>(DC), Mod ? Mod : TheCU);
	}

	llvm::DISubprogram *CGDebugInfo::getFunctionFwdDeclOrStub(GlobalDecl GD,
	bool Stub) {
	llvm::DINodeArray TParamsArray;
	StringRef Name, LinkageName;
	llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero;
	llvm::DISubprogram::DISPFlags SPFlags = llvm::DISubprogram::SPFlagZero;
	SourceLocation Loc = GD.getDecl()->getLocation();
	llvm::DIFile *Unit = getOrCreateFile(Loc);
	llvm::DIScope *DContext = Unit;
	unsigned Line = getLineNumber(Loc);
	collectFunctionDeclProps(GD, Unit, Name, LinkageName, DContext, TParamsArray,
	Flags);
	auto *FD = dyn_cast<FunctionDecl>(GD.getDecl());

	// Build function type.
	SmallVector<QualType, 16> ArgTypes;
	if (FD)
	for (const ParmVarDecl *Parm : FD->parameters())
	ArgTypes.push_back(Parm->getType());
	CallingConv CC = FD->getType()->castAs<FunctionType>()->getCallConv();
	QualType FnType = CGM.getContext().getFunctionType(
	FD->getReturnType(), ArgTypes, FunctionProtoType::ExtProtoInfo(CC));
	if (!FD->isExternallyVisible())
	SPFlags \|= llvm::DISubprogram::SPFlagLocalToUnit;
	if (CGM.getLangOpts().Optimize)
	SPFlags \|= llvm::DISubprogram::SPFlagOptimized;

	if (Stub) {
	Flags \|= getCallSiteRelatedAttrs();
	SPFlags \|= llvm::DISubprogram::SPFlagDefinition;
	return DBuilder.createFunction(
	DContext, Name, LinkageName, Unit, Line,
	getOrCreateFunctionType(GD.getDecl(), FnType, Unit), 0, Flags, SPFlags,
	TParamsArray.get(), getFunctionDeclaration(FD));
	}

	llvm::DISubprogram *SP = DBuilder.createTempFunctionFwdDecl(
	DContext, Name, LinkageName, Unit, Line,
	getOrCreateFunctionType(GD.getDecl(), FnType, Unit), 0, Flags, SPFlags,
	TParamsArray.get(), getFunctionDeclaration(FD));
	const FunctionDecl *CanonDecl = FD->getCanonicalDecl();
	FwdDeclReplaceMap.emplace_back(std::piecewise_construct,
	std::make_tuple(CanonDecl),
	std::make_tuple(SP));
	return SP;
	}

	llvm::DISubprogram *CGDebugInfo::getFunctionForwardDeclaration(GlobalDecl GD) {
	return getFunctionFwdDeclOrStub(GD, /* Stub = */ false);
	}

	llvm::DISubprogram *CGDebugInfo::getFunctionStub(GlobalDecl GD) {
	return getFunctionFwdDeclOrStub(GD, /* Stub = */ true);
	}

	llvm::DIGlobalVariable *
	CGDebugInfo::getGlobalVariableForwardDeclaration(const VarDecl *VD) {
	QualType T;
	StringRef Name, LinkageName;
	SourceLocation Loc = VD->getLocation();
	llvm::DIFile *Unit = getOrCreateFile(Loc);
	llvm::DIScope *DContext = Unit;
	unsigned Line = getLineNumber(Loc);
	llvm::MDTuple *TemplateParameters = nullptr;

	collectVarDeclProps(VD, Unit, Line, T, Name, LinkageName, TemplateParameters,
	DContext);
	auto Align = getDeclAlignIfRequired(VD, CGM.getContext());
	auto *GV = DBuilder.createTempGlobalVariableFwdDecl(
	DContext, Name, LinkageName, Unit, Line, getOrCreateType(T, Unit),
	!VD->isExternallyVisible(), nullptr, TemplateParameters, Align);
	FwdDeclReplaceMap.emplace_back(
	std::piecewise_construct,
	std::make_tuple(cast<VarDecl>(VD->getCanonicalDecl())),
	std::make_tuple(static_cast<llvm::Metadata *>(GV)));
	return GV;
	}

	llvm::DINode CGDebugInfo::getDeclarationOrDefinition(const Decl D) {
	// We only need a declaration (not a definition) of the type - so use whatever
	// we would otherwise do to get a type for a pointee. (forward declarations in
	// limited debug info, full definitions (if the type definition is available)
	// in unlimited debug info)
	if (const auto *TD = dyn_cast<TypeDecl>(D))
	return getOrCreateType(CGM.getContext().getTypeDeclType(TD),
	getOrCreateFile(TD->getLocation()));
	auto I = DeclCache.find(D->getCanonicalDecl());

	if (I != DeclCache.end()) {
	auto N = I->second;
	if (auto *GVE = dyn_cast_or_null<llvm::DIGlobalVariableExpression>(N))
	return GVE->getVariable();
	return dyn_cast_or_null<llvm::DINode>(N);
	}

	// No definition for now. Emit a forward definition that might be
	// merged with a potential upcoming definition.
	if (const auto *FD = dyn_cast<FunctionDecl>(D))
	return getFunctionForwardDeclaration(FD);
	else if (const auto *VD = dyn_cast<VarDecl>(D))
	return getGlobalVariableForwardDeclaration(VD);

	return nullptr;
	}

	llvm::DISubprogram CGDebugInfo::getFunctionDeclaration(const Decl D) {
	if (!D \|\| DebugKind <= codegenoptions::DebugLineTablesOnly)
	return nullptr;

	const auto *FD = dyn_cast<FunctionDecl>(D);
	if (!FD)
	return nullptr;

	// Setup context.
	auto *S = getDeclContextDescriptor(D);

	auto MI = SPCache.find(FD->getCanonicalDecl());
	if (MI == SPCache.end()) {
	if (const auto *MD = dyn_cast<CXXMethodDecl>(FD->getCanonicalDecl())) {
	return CreateCXXMemberFunction(MD, getOrCreateFile(MD->getLocation()),
	cast<llvm::DICompositeType>(S));
	}
	}
	if (MI != SPCache.end()) {
	auto *SP = dyn_cast_or_null<llvm::DISubprogram>(MI->second);
	if (SP && !SP->isDefinition())
	return SP;
	}

	for (auto NextFD : FD->redecls()) {
	auto MI = SPCache.find(NextFD->getCanonicalDecl());
	if (MI != SPCache.end()) {
	auto *SP = dyn_cast_or_null<llvm::DISubprogram>(MI->second);
	if (SP && !SP->isDefinition())
	return SP;
	}
	}
	return nullptr;
	}

	// getOrCreateFunctionType - Construct type. If it is a c++ method, include
	// implicit parameter "this".
	llvm::DISubroutineType CGDebugInfo::getOrCreateFunctionType(const Decl D,
	QualType FnType,
	llvm::DIFile *F) {
	if (!D \|\| DebugKind <= codegenoptions::DebugLineTablesOnly)
	// Create fake but valid subroutine type. Otherwise -verify would fail, and
	// subprogram DIE will miss DW_AT_decl_file and DW_AT_decl_line fields.
	return DBuilder.createSubroutineType(DBuilder.getOrCreateTypeArray(None));

	if (const auto *Method = dyn_cast<CXXMethodDecl>(D))
	return getOrCreateMethodType(Method, F);

	const auto *FTy = FnType->getAs<FunctionType>();
	CallingConv CC = FTy ? FTy->getCallConv() : CallingConv::CC_C;

	if (const auto *OMethod = dyn_cast<ObjCMethodDecl>(D)) {
	// Add "self" and "_cmd"
	SmallVector<llvm::Metadata *, 16> Elts;

	// First element is always return type. For 'void' functions it is NULL.
	QualType ResultTy = OMethod->getReturnType();

	// Replace the instancetype keyword with the actual type.
	if (ResultTy == CGM.getContext().getObjCInstanceType())
	ResultTy = CGM.getContext().getPointerType(
	QualType(OMethod->getClassInterface()->getTypeForDecl(), 0));

	Elts.push_back(getOrCreateType(ResultTy, F));
	// "self" pointer is always first argument.
	QualType SelfDeclTy;
	if (auto *SelfDecl = OMethod->getSelfDecl())
	SelfDeclTy = SelfDecl->getType();
	else if (auto *FPT = dyn_cast<FunctionProtoType>(FnType))
	if (FPT->getNumParams() > 1)
	SelfDeclTy = FPT->getParamType(0);
	if (!SelfDeclTy.isNull())
	Elts.push_back(
	CreateSelfType(SelfDeclTy, getOrCreateType(SelfDeclTy, F)));
	// "_cmd" pointer is always second argument.
	Elts.push_back(DBuilder.createArtificialType(
	getOrCreateType(CGM.getContext().getObjCSelType(), F)));
	// Get rest of the arguments.
	for (const auto *PI : OMethod->parameters())
	Elts.push_back(getOrCreateType(PI->getType(), F));
	// Variadic methods need a special marker at the end of the type list.
	if (OMethod->isVariadic())
	Elts.push_back(DBuilder.createUnspecifiedParameter());

	llvm::DITypeRefArray EltTypeArray = DBuilder.getOrCreateTypeArray(Elts);
	return DBuilder.createSubroutineType(EltTypeArray, llvm::DINode::FlagZero,
	getDwarfCC(CC));
	}

	// Handle variadic function types; they need an additional
	// unspecified parameter.
	if (const auto *FD = dyn_cast<FunctionDecl>(D))
	if (FD->isVariadic()) {
	SmallVector<llvm::Metadata *, 16> EltTys;
	EltTys.push_back(getOrCreateType(FD->getReturnType(), F));
	if (const auto *FPT = dyn_cast<FunctionProtoType>(FnType))
	for (QualType ParamType : FPT->param_types())
	EltTys.push_back(getOrCreateType(ParamType, F));
	EltTys.push_back(DBuilder.createUnspecifiedParameter());
	llvm::DITypeRefArray EltTypeArray = DBuilder.getOrCreateTypeArray(EltTys);
	return DBuilder.createSubroutineType(EltTypeArray, llvm::DINode::FlagZero,
	getDwarfCC(CC));
	}

	return cast<llvm::DISubroutineType>(getOrCreateType(FnType, F));
	}

	void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc,
	SourceLocation ScopeLoc, QualType FnType,
	llvm::Function *Fn, bool CurFuncIsThunk,
	CGBuilderTy &Builder) {

	StringRef Name;
	StringRef LinkageName;

	FnBeginRegionCount.push_back(LexicalBlockStack.size());

	const Decl *D = GD.getDecl();
	bool HasDecl = (D != nullptr);

	llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero;
	llvm::DISubprogram::DISPFlags SPFlags = llvm::DISubprogram::SPFlagZero;
	llvm::DIFile *Unit = getOrCreateFile(Loc);
	llvm::DIScope *FDContext = Unit;
	llvm::DINodeArray TParamsArray;
	if (!HasDecl) {
	// Use llvm function name.
	LinkageName = Fn->getName();
	} else if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
	// If there is a subprogram for this function available then use it.
	auto FI = SPCache.find(FD->getCanonicalDecl());
	if (FI != SPCache.end()) {
	auto *SP = dyn_cast_or_null<llvm::DISubprogram>(FI->second);
	if (SP && SP->isDefinition()) {
	LexicalBlockStack.emplace_back(SP);
	RegionMap[D].reset(SP);
	return;
	}
	}
	collectFunctionDeclProps(GD, Unit, Name, LinkageName, FDContext,
	TParamsArray, Flags);
	} else if (const auto *OMD = dyn_cast<ObjCMethodDecl>(D)) {
	Name = getObjCMethodName(OMD);
	Flags \|= llvm::DINode::FlagPrototyped;
	} else {
	// Use llvm function name.
	Name = Fn->getName();
	Flags \|= llvm::DINode::FlagPrototyped;
	}
	if (Name.startswith("\01"))
	Name = Name.substr(1);

	if (!HasDecl \|\| D->isImplicit() \|\| D->hasAttr<ArtificialAttr>()) {
	Flags \|= llvm::DINode::FlagArtificial;
	// Artificial functions should not silently reuse CurLoc.
	CurLoc = SourceLocation();
	}

	if (CurFuncIsThunk)
	Flags \|= llvm::DINode::FlagThunk;

	if (Fn->hasLocalLinkage())
	SPFlags \|= llvm::DISubprogram::SPFlagLocalToUnit;
	if (CGM.getLangOpts().Optimize)
	SPFlags \|= llvm::DISubprogram::SPFlagOptimized;

	llvm::DINode::DIFlags FlagsForDef = Flags \| getCallSiteRelatedAttrs();
	llvm::DISubprogram::DISPFlags SPFlagsForDef =
	SPFlags \| llvm::DISubprogram::SPFlagDefinition;

	unsigned LineNo = getLineNumber(Loc);
	unsigned ScopeLine = getLineNumber(ScopeLoc);

	// FIXME: The function declaration we're constructing here is mostly reusing
	// declarations from CXXMethodDecl and not constructing new ones for arbitrary
	// FunctionDecls. When/if we fix this we can have FDContext be TheCU/null for
	// all subprograms instead of the actual context since subprogram definitions
	// are emitted as CU level entities by the backend.
	llvm::DISubprogram *SP = DBuilder.createFunction(
	FDContext, Name, LinkageName, Unit, LineNo,
	getOrCreateFunctionType(D, FnType, Unit), ScopeLine, FlagsForDef,
	SPFlagsForDef, TParamsArray.get(), getFunctionDeclaration(D));
	Fn->setSubprogram(SP);
	// We might get here with a VarDecl in the case we're generating
	// code for the initialization of globals. Do not record these decls
	// as they will overwrite the actual VarDecl Decl in the cache.
	if (HasDecl && isa<FunctionDecl>(D))
	DeclCache[D->getCanonicalDecl()].reset(SP);

	if (CGM.getCodeGenOpts().DwarfVersion >= 5) {
	// Starting with DWARF V5 method declarations are emitted as children of
	// the interface type.
	if (const auto *OMD = dyn_cast_or_null<ObjCMethodDecl>(D)) {
	const ObjCInterfaceDecl *ID = OMD->getClassInterface();
	QualType QTy(ID->getTypeForDecl(), 0);
	auto It = TypeCache.find(QTy.getAsOpaquePtr());
	if (It != TypeCache.end()) {
	llvm::DICompositeType *InterfaceDecl =
	cast<llvm::DICompositeType>(It->second);
	llvm::DISubprogram *FD = DBuilder.createFunction(
	InterfaceDecl, Name, LinkageName, Unit, LineNo,
	getOrCreateFunctionType(D, FnType, Unit), ScopeLine, Flags, SPFlags,
	TParamsArray.get());
	DBuilder.finalizeSubprogram(FD);
	ObjCMethodCache[ID].push_back(FD);
	}
	}
	}

	// Push the function onto the lexical block stack.
	LexicalBlockStack.emplace_back(SP);

	if (HasDecl)
	RegionMap[D].reset(SP);
	}

	void CGDebugInfo::EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc,
	QualType FnType) {
	StringRef Name;
	StringRef LinkageName;

	const Decl *D = GD.getDecl();
	if (!D)
	return;

	llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero;
	llvm::DIFile *Unit = getOrCreateFile(Loc);
	llvm::DIScope *FDContext = getDeclContextDescriptor(D);
	llvm::DINodeArray TParamsArray;
	if (isa<FunctionDecl>(D)) {
	// If there is a DISubprogram for this function available then use it.
	collectFunctionDeclProps(GD, Unit, Name, LinkageName, FDContext,
	TParamsArray, Flags);
	} else if (const auto *OMD = dyn_cast<ObjCMethodDecl>(D)) {
	Name = getObjCMethodName(OMD);
	Flags \|= llvm::DINode::FlagPrototyped;
	} else {
	llvm_unreachable("not a function or ObjC method");
	}
	if (!Name.empty() && Name[0] == '\01')
	Name = Name.substr(1);

	if (D->isImplicit()) {
	Flags \|= llvm::DINode::FlagArtificial;
	// Artificial functions without a location should not silently reuse CurLoc.
	if (Loc.isInvalid())
	CurLoc = SourceLocation();
	}
	unsigned LineNo = getLineNumber(Loc);
	unsigned ScopeLine = 0;
	llvm::DISubprogram::DISPFlags SPFlags = llvm::DISubprogram::SPFlagZero;
	if (CGM.getLangOpts().Optimize)
	SPFlags \|= llvm::DISubprogram::SPFlagOptimized;

	DBuilder.retainType(DBuilder.createFunction(
	FDContext, Name, LinkageName, Unit, LineNo,
	getOrCreateFunctionType(D, FnType, Unit), ScopeLine, Flags, SPFlags,
	TParamsArray.get(), getFunctionDeclaration(D)));
	}

	void CGDebugInfo::EmitInlineFunctionStart(CGBuilderTy &Builder, GlobalDecl GD) {
	const auto *FD = cast<FunctionDecl>(GD.getDecl());
	// If there is a subprogram for this function available then use it.
	auto FI = SPCache.find(FD->getCanonicalDecl());
	llvm::DISubprogram *SP = nullptr;
	if (FI != SPCache.end())
	SP = dyn_cast_or_null<llvm::DISubprogram>(FI->second);
	if (!SP \|\| !SP->isDefinition())
	SP = getFunctionStub(GD);
	FnBeginRegionCount.push_back(LexicalBlockStack.size());
	LexicalBlockStack.emplace_back(SP);
	setInlinedAt(Builder.getCurrentDebugLocation());
	EmitLocation(Builder, FD->getLocation());
	}

	void CGDebugInfo::EmitInlineFunctionEnd(CGBuilderTy &Builder) {
	assert(CurInlinedAt && "unbalanced inline scope stack");
	EmitFunctionEnd(Builder, nullptr);
	setInlinedAt(llvm::DebugLoc(CurInlinedAt).getInlinedAt());
	}

	void CGDebugInfo::EmitLocation(CGBuilderTy &Builder, SourceLocation Loc) {
	// Update our current location
	setLocation(Loc);

	if (CurLoc.isInvalid() \|\| CurLoc.isMacroID() \|\| LexicalBlockStack.empty())
	return;

	llvm::MDNode *Scope = LexicalBlockStack.back();
	Builder.SetCurrentDebugLocation(llvm::DebugLoc::get(
	getLineNumber(CurLoc), getColumnNumber(CurLoc), Scope, CurInlinedAt));
	}

	void CGDebugInfo::CreateLexicalBlock(SourceLocation Loc) {
	llvm::MDNode *Back = nullptr;
	if (!LexicalBlockStack.empty())
	Back = LexicalBlockStack.back().get();
	LexicalBlockStack.emplace_back(DBuilder.createLexicalBlock(
	cast<llvm::DIScope>(Back), getOrCreateFile(CurLoc), getLineNumber(CurLoc),
	getColumnNumber(CurLoc)));
	}

	void CGDebugInfo::AppendAddressSpaceXDeref(
	unsigned AddressSpace, SmallVectorImpl<int64_t> &Expr) const {
	Optional<unsigned> DWARFAddressSpace =
	CGM.getTarget().getDWARFAddressSpace(AddressSpace);
	if (!DWARFAddressSpace)
	return;

	Expr.push_back(llvm::dwarf::DW_OP_constu);
	Expr.push_back(DWARFAddressSpace.getValue());
	Expr.push_back(llvm::dwarf::DW_OP_swap);
	Expr.push_back(llvm::dwarf::DW_OP_xderef);
	}

	void CGDebugInfo::EmitLexicalBlockStart(CGBuilderTy &Builder,
	SourceLocation Loc) {
	// Set our current location.
	setLocation(Loc);

	// Emit a line table change for the current location inside the new scope.
	Builder.SetCurrentDebugLocation(
	llvm::DebugLoc::get(getLineNumber(Loc), getColumnNumber(Loc),
	LexicalBlockStack.back(), CurInlinedAt));

	if (DebugKind <= codegenoptions::DebugLineTablesOnly)
	return;

	// Create a new lexical block and push it on the stack.
	CreateLexicalBlock(Loc);
	}

	void CGDebugInfo::EmitLexicalBlockEnd(CGBuilderTy &Builder,
	SourceLocation Loc) {
	assert(!LexicalBlockStack.empty() && "Region stack mismatch, stack empty!");

	// Provide an entry in the line table for the end of the block.
	EmitLocation(Builder, Loc);

	if (DebugKind <= codegenoptions::DebugLineTablesOnly)
	return;

	LexicalBlockStack.pop_back();
	}

	void CGDebugInfo::EmitFunctionEnd(CGBuilderTy &Builder, llvm::Function *Fn) {
	assert(!LexicalBlockStack.empty() && "Region stack mismatch, stack empty!");
	unsigned RCount = FnBeginRegionCount.back();
	assert(RCount <= LexicalBlockStack.size() && "Region stack mismatch");

	// Pop all regions for this function.
	while (LexicalBlockStack.size() != RCount) {
	// Provide an entry in the line table for the end of the block.
	EmitLocation(Builder, CurLoc);
	LexicalBlockStack.pop_back();
	}
	FnBeginRegionCount.pop_back();

	if (Fn && Fn->getSubprogram())
	DBuilder.finalizeSubprogram(Fn->getSubprogram());
	}

	CGDebugInfo::BlockByRefType
	CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
	uint64_t *XOffset) {
	SmallVector<llvm::Metadata *, 5> EltTys;
	QualType FType;
	uint64_t FieldSize, FieldOffset;
	uint32_t FieldAlign;

	llvm::DIFile *Unit = getOrCreateFile(VD->getLocation());
	QualType Type = VD->getType();

	FieldOffset = 0;
	FType = CGM.getContext().getPointerType(CGM.getContext().VoidTy);
	EltTys.push_back(CreateMemberType(Unit, FType, "__isa", &FieldOffset));
	EltTys.push_back(CreateMemberType(Unit, FType, "__forwarding", &FieldOffset));
	FType = CGM.getContext().IntTy;
	EltTys.push_back(CreateMemberType(Unit, FType, "__flags", &FieldOffset));
	EltTys.push_back(CreateMemberType(Unit, FType, "__size", &FieldOffset));

	bool HasCopyAndDispose = CGM.getContext().BlockRequiresCopying(Type, VD);
	if (HasCopyAndDispose) {
	FType = CGM.getContext().getPointerType(CGM.getContext().VoidTy);
	EltTys.push_back(
	CreateMemberType(Unit, FType, "__copy_helper", &FieldOffset));
	EltTys.push_back(
	CreateMemberType(Unit, FType, "__destroy_helper", &FieldOffset));
	}
	bool HasByrefExtendedLayout;
	Qualifiers::ObjCLifetime Lifetime;
	if (CGM.getContext().getByrefLifetime(Type, Lifetime,
	HasByrefExtendedLayout) &&
	HasByrefExtendedLayout) {
	FType = CGM.getContext().getPointerType(CGM.getContext().VoidTy);
	EltTys.push_back(
	CreateMemberType(Unit, FType, "__byref_variable_layout", &FieldOffset));
	}

	CharUnits Align = CGM.getContext().getDeclAlign(VD);
	if (Align > CGM.getContext().toCharUnitsFromBits(
	CGM.getTarget().getPointerAlign(0))) {
	CharUnits FieldOffsetInBytes =
	CGM.getContext().toCharUnitsFromBits(FieldOffset);
	CharUnits AlignedOffsetInBytes = FieldOffsetInBytes.alignTo(Align);
	CharUnits NumPaddingBytes = AlignedOffsetInBytes - FieldOffsetInBytes;

	if (NumPaddingBytes.isPositive()) {
	llvm::APInt pad(32, NumPaddingBytes.getQuantity());
	FType = CGM.getContext().getConstantArrayType(CGM.getContext().CharTy,
	pad, ArrayType::Normal, 0);
	EltTys.push_back(CreateMemberType(Unit, FType, "", &FieldOffset));
	}
	}

	FType = Type;
	llvm::DIType *WrappedTy = getOrCreateType(FType, Unit);
	FieldSize = CGM.getContext().getTypeSize(FType);
	FieldAlign = CGM.getContext().toBits(Align);

	*XOffset = FieldOffset;
	llvm::DIType *FieldTy = DBuilder.createMemberType(
	Unit, VD->getName(), Unit, 0, FieldSize, FieldAlign, FieldOffset,
	llvm::DINode::FlagZero, WrappedTy);
	EltTys.push_back(FieldTy);
	FieldOffset += FieldSize;

	llvm::DINodeArray Elements = DBuilder.getOrCreateArray(EltTys);
	return {DBuilder.createStructType(Unit, "", Unit, 0, FieldOffset, 0,
	llvm::DINode::FlagZero, nullptr, Elements),
	WrappedTy};
	}

	llvm::DILocalVariable CGDebugInfo::EmitDeclare(const VarDecl VD,
	llvm::Value *Storage,
	llvm::Optional<unsigned> ArgNo,
	CGBuilderTy &Builder) {
	assert(DebugKind >= codegenoptions::LimitedDebugInfo);
	assert(!LexicalBlockStack.empty() && "Region stack mismatch, stack empty!");
	if (VD->hasAttr<NoDebugAttr>())
	return nullptr;

	bool Unwritten =
	VD->isImplicit() \|\| (isa<Decl>(VD->getDeclContext()) &&
	cast<Decl>(VD->getDeclContext())->isImplicit());
	llvm::DIFile *Unit = nullptr;
	if (!Unwritten)
	Unit = getOrCreateFile(VD->getLocation());
	llvm::DIType *Ty;
	uint64_t XOffset = 0;
	if (VD->hasAttr<BlocksAttr>())
	Ty = EmitTypeForVarWithBlocksAttr(VD, &XOffset).WrappedType;
	else
	Ty = getOrCreateType(VD->getType(), Unit);

	// If there is no debug info for this type then do not emit debug info
	// for this variable.
	if (!Ty)
	return nullptr;

	// Get location information.
	unsigned Line = 0;
	unsigned Column = 0;
	if (!Unwritten) {
	Line = getLineNumber(VD->getLocation());
	Column = getColumnNumber(VD->getLocation());
	}
	SmallVector<int64_t, 13> Expr;
	llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero;
	if (VD->isImplicit())
	Flags \|= llvm::DINode::FlagArtificial;

	auto Align = getDeclAlignIfRequired(VD, CGM.getContext());

	unsigned AddressSpace = CGM.getContext().getTargetAddressSpace(VD->getType());
	AppendAddressSpaceXDeref(AddressSpace, Expr);

	// If this is implicit parameter of CXXThis or ObjCSelf kind, then give it an
	// object pointer flag.
	if (const auto *IPD = dyn_cast<ImplicitParamDecl>(VD)) {
	if (IPD->getParameterKind() == ImplicitParamDecl::CXXThis \|\|
	IPD->getParameterKind() == ImplicitParamDecl::ObjCSelf)
	Flags \|= llvm::DINode::FlagObjectPointer;
	}

	// Note: Older versions of clang used to emit byval references with an extra
	// DW_OP_deref, because they referenced the IR arg directly instead of
	// referencing an alloca. Newer versions of LLVM don't treat allocas
	// differently from other function arguments when used in a dbg.declare.
	auto *Scope = cast<llvm::DIScope>(LexicalBlockStack.back());
	StringRef Name = VD->getName();
	if (!Name.empty()) {
	if (VD->hasAttr<BlocksAttr>()) {
	// Here, we need an offset into the alloca.
	CharUnits offset = CharUnits::fromQuantity(32);
	Expr.push_back(llvm::dwarf::DW_OP_plus_uconst);
	// offset of __forwarding field
	offset = CGM.getContext().toCharUnitsFromBits(
	CGM.getTarget().getPointerWidth(0));
	Expr.push_back(offset.getQuantity());
	Expr.push_back(llvm::dwarf::DW_OP_deref);
	Expr.push_back(llvm::dwarf::DW_OP_plus_uconst);
	// offset of x field
	offset = CGM.getContext().toCharUnitsFromBits(XOffset);
	Expr.push_back(offset.getQuantity());
	}
	} else if (const auto *RT = dyn_cast<RecordType>(VD->getType())) {
	// If VD is an anonymous union then Storage represents value for
	// all union fields.
	const RecordDecl *RD = RT->getDecl();
	if (RD->isUnion() && RD->isAnonymousStructOrUnion()) {
	// GDB has trouble finding local variables in anonymous unions, so we emit
	// artificial local variables for each of the members.
	//
	// FIXME: Remove this code as soon as GDB supports this.
	// The debug info verifier in LLVM operates based on the assumption that a
	// variable has the same size as its storage and we had to disable the
	// check for artificial variables.
	for (const auto *Field : RD->fields()) {
	llvm::DIType *FieldTy = getOrCreateType(Field->getType(), Unit);
	StringRef FieldName = Field->getName();

	// Ignore unnamed fields. Do not ignore unnamed records.
	if (FieldName.empty() && !isa<RecordType>(Field->getType()))
	continue;

	// Use VarDecl's Tag, Scope and Line number.
	auto FieldAlign = getDeclAlignIfRequired(Field, CGM.getContext());
	auto *D = DBuilder.createAutoVariable(
	Scope, FieldName, Unit, Line, FieldTy, CGM.getLangOpts().Optimize,
	Flags \| llvm::DINode::FlagArtificial, FieldAlign);

	// Insert an llvm.dbg.declare into the current block.
	DBuilder.insertDeclare(
	Storage, D, DBuilder.createExpression(Expr),
	llvm::DebugLoc::get(Line, Column, Scope, CurInlinedAt),
	Builder.GetInsertBlock());
	}
	}
	}

	// Create the descriptor for the variable.
	auto *D = ArgNo ? DBuilder.createParameterVariable(
	Scope, Name, *ArgNo, Unit, Line, Ty,
	CGM.getLangOpts().Optimize, Flags)
	: DBuilder.createAutoVariable(Scope, Name, Unit, Line, Ty,
	CGM.getLangOpts().Optimize,
	Flags, Align);

	// Insert an llvm.dbg.declare into the current block.
	DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(Expr),
	llvm::DebugLoc::get(Line, Column, Scope, CurInlinedAt),
	Builder.GetInsertBlock());

	return D;
	}

	llvm::DILocalVariable *
	CGDebugInfo::EmitDeclareOfAutoVariable(const VarDecl VD, llvm::Value Storage,
	CGBuilderTy &Builder) {
	assert(DebugKind >= codegenoptions::LimitedDebugInfo);
	return EmitDeclare(VD, Storage, llvm::None, Builder);
	}

	llvm::DIType *CGDebugInfo::CreateSelfType(const QualType &QualTy,
	llvm::DIType *Ty) {
	llvm::DIType *CachedTy = getTypeOrNull(QualTy);
	if (CachedTy)
	Ty = CachedTy;
	return DBuilder.createObjectPointerType(Ty);
	}

	void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable(
	const VarDecl VD, llvm::Value Storage, CGBuilderTy &Builder,
	const CGBlockInfo &blockInfo, llvm::Instruction *InsertPoint) {
	assert(DebugKind >= codegenoptions::LimitedDebugInfo);
	assert(!LexicalBlockStack.empty() && "Region stack mismatch, stack empty!");

	if (Builder.GetInsertBlock() == nullptr)
	return;
	if (VD->hasAttr<NoDebugAttr>())
	return;

	bool isByRef = VD->hasAttr<BlocksAttr>();

	uint64_t XOffset = 0;
	llvm::DIFile *Unit = getOrCreateFile(VD->getLocation());
	llvm::DIType *Ty;
	if (isByRef)
	Ty = EmitTypeForVarWithBlocksAttr(VD, &XOffset).WrappedType;
	else
	Ty = getOrCreateType(VD->getType(), Unit);

	// Self is passed along as an implicit non-arg variable in a
	// block. Mark it as the object pointer.
	if (const auto *IPD = dyn_cast<ImplicitParamDecl>(VD))
	if (IPD->getParameterKind() == ImplicitParamDecl::ObjCSelf)
	Ty = CreateSelfType(VD->getType(), Ty);

	// Get location information.
	unsigned Line = getLineNumber(VD->getLocation());
	unsigned Column = getColumnNumber(VD->getLocation());

	const llvm::DataLayout &target = CGM.getDataLayout();

	CharUnits offset = CharUnits::fromQuantity(
	target.getStructLayout(blockInfo.StructureType)
	->getElementOffset(blockInfo.getCapture(VD).getIndex()));

	SmallVector<int64_t, 9> addr;
	addr.push_back(llvm::dwarf::DW_OP_deref);
	addr.push_back(llvm::dwarf::DW_OP_plus_uconst);
	addr.push_back(offset.getQuantity());
	if (isByRef) {
	addr.push_back(llvm::dwarf::DW_OP_deref);
	addr.push_back(llvm::dwarf::DW_OP_plus_uconst);
	// offset of __forwarding field
	offset =
	CGM.getContext().toCharUnitsFromBits(target.getPointerSizeInBits(0));
	addr.push_back(offset.getQuantity());
	addr.push_back(llvm::dwarf::DW_OP_deref);
	addr.push_back(llvm::dwarf::DW_OP_plus_uconst);
	// offset of x field
	offset = CGM.getContext().toCharUnitsFromBits(XOffset);
	addr.push_back(offset.getQuantity());
	}

	// Create the descriptor for the variable.
	auto Align = getDeclAlignIfRequired(VD, CGM.getContext());
	auto *D = DBuilder.createAutoVariable(
	cast<llvm::DILocalScope>(LexicalBlockStack.back()), VD->getName(), Unit,
	Line, Ty, false, llvm::DINode::FlagZero, Align);

	// Insert an llvm.dbg.declare into the current block.
	auto DL =
	llvm::DebugLoc::get(Line, Column, LexicalBlockStack.back(), CurInlinedAt);
	auto *Expr = DBuilder.createExpression(addr);
	if (InsertPoint)
	DBuilder.insertDeclare(Storage, D, Expr, DL, InsertPoint);
	else
	DBuilder.insertDeclare(Storage, D, Expr, DL, Builder.GetInsertBlock());
	}

	void CGDebugInfo::EmitDeclareOfArgVariable(const VarDecl VD, llvm::Value AI,
	unsigned ArgNo,
	CGBuilderTy &Builder) {
	assert(DebugKind >= codegenoptions::LimitedDebugInfo);
	EmitDeclare(VD, AI, ArgNo, Builder);
	}

	namespace {
	struct BlockLayoutChunk {
	uint64_t OffsetInBits;
	const BlockDecl::Capture *Capture;
	};
	bool operator<(const BlockLayoutChunk &l, const BlockLayoutChunk &r) {
	return l.OffsetInBits < r.OffsetInBits;
	}
	} // namespace

	void CGDebugInfo::collectDefaultFieldsForBlockLiteralDeclare(
	const CGBlockInfo &Block, const ASTContext &Context, SourceLocation Loc,
	const llvm::StructLayout &BlockLayout, llvm::DIFile *Unit,
	SmallVectorImpl<llvm::Metadata *> &Fields) {
	// Blocks in OpenCL have unique constraints which make the standard fields
	// redundant while requiring size and align fields for enqueue_kernel. See
	// initializeForBlockHeader in CGBlocks.cpp
	if (CGM.getLangOpts().OpenCL) {
	Fields.push_back(createFieldType("__size", Context.IntTy, Loc, AS_public,
	BlockLayout.getElementOffsetInBits(0),
	Unit, Unit));
	Fields.push_back(createFieldType("__align", Context.IntTy, Loc, AS_public,
	BlockLayout.getElementOffsetInBits(1),
	Unit, Unit));
	} else {
	Fields.push_back(createFieldType("__isa", Context.VoidPtrTy, Loc, AS_public,
	BlockLayout.getElementOffsetInBits(0),
	Unit, Unit));
	Fields.push_back(createFieldType("__flags", Context.IntTy, Loc, AS_public,
	BlockLayout.getElementOffsetInBits(1),
	Unit, Unit));
	Fields.push_back(
	createFieldType("__reserved", Context.IntTy, Loc, AS_public,
	BlockLayout.getElementOffsetInBits(2), Unit, Unit));
	auto *FnTy = Block.getBlockExpr()->getFunctionType();
	auto FnPtrType = CGM.getContext().getPointerType(FnTy->desugar());
	Fields.push_back(createFieldType("__FuncPtr", FnPtrType, Loc, AS_public,
	BlockLayout.getElementOffsetInBits(3),
	Unit, Unit));
	Fields.push_back(createFieldType(
	"__descriptor",
	Context.getPointerType(Block.NeedsCopyDispose
	? Context.getBlockDescriptorExtendedType()
	: Context.getBlockDescriptorType()),
	Loc, AS_public, BlockLayout.getElementOffsetInBits(4), Unit, Unit));
	}
	}

	void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
	StringRef Name,
	unsigned ArgNo,
	llvm::AllocaInst *Alloca,
	CGBuilderTy &Builder) {
	assert(DebugKind >= codegenoptions::LimitedDebugInfo);
	ASTContext &C = CGM.getContext();
	const BlockDecl *blockDecl = block.getBlockDecl();

	// Collect some general information about the block's location.
	SourceLocation loc = blockDecl->getCaretLocation();
	llvm::DIFile *tunit = getOrCreateFile(loc);
	unsigned line = getLineNumber(loc);
	unsigned column = getColumnNumber(loc);

	// Build the debug-info type for the block literal.
	getDeclContextDescriptor(blockDecl);

	const llvm::StructLayout *blockLayout =
	CGM.getDataLayout().getStructLayout(block.StructureType);

	SmallVector<llvm::Metadata *, 16> fields;
	collectDefaultFieldsForBlockLiteralDeclare(block, C, loc, *blockLayout, tunit,
	fields);

	// We want to sort the captures by offset, not because DWARF
	// requires this, but because we're paranoid about debuggers.
	SmallVector<BlockLayoutChunk, 8> chunks;

	// 'this' capture.
	if (blockDecl->capturesCXXThis()) {
	BlockLayoutChunk chunk;
	chunk.OffsetInBits =
	blockLayout->getElementOffsetInBits(block.CXXThisIndex);
	chunk.Capture = nullptr;
	chunks.push_back(chunk);
	}

	// Variable captures.
	for (const auto &capture : blockDecl->captures()) {
	const VarDecl *variable = capture.getVariable();
	const CGBlockInfo::Capture &captureInfo = block.getCapture(variable);

	// Ignore constant captures.
	if (captureInfo.isConstant())
	continue;

	BlockLayoutChunk chunk;
	chunk.OffsetInBits =
	blockLayout->getElementOffsetInBits(captureInfo.getIndex());
	chunk.Capture = &capture;
	chunks.push_back(chunk);
	}

	// Sort by offset.
	llvm::array_pod_sort(chunks.begin(), chunks.end());

	for (const BlockLayoutChunk &Chunk : chunks) {
	uint64_t offsetInBits = Chunk.OffsetInBits;
	const BlockDecl::Capture *capture = Chunk.Capture;

	// If we have a null capture, this must be the C++ 'this' capture.
	if (!capture) {
	QualType type;
	if (auto *Method =
	cast_or_null<CXXMethodDecl>(blockDecl->getNonClosureContext()))
	type = Method->getThisType();
	else if (auto *RDecl = dyn_cast<CXXRecordDecl>(blockDecl->getParent()))
	type = QualType(RDecl->getTypeForDecl(), 0);
	else
	llvm_unreachable("unexpected block declcontext");

	fields.push_back(createFieldType("this", type, loc, AS_public,
	offsetInBits, tunit, tunit));
	continue;
	}

	const VarDecl *variable = capture->getVariable();
	StringRef name = variable->getName();

	llvm::DIType *fieldType;
	if (capture->isByRef()) {
	TypeInfo PtrInfo = C.getTypeInfo(C.VoidPtrTy);
	auto Align = PtrInfo.AlignIsRequired ? PtrInfo.Align : 0;
	// FIXME: This recomputes the layout of the BlockByRefWrapper.
	uint64_t xoffset;
	fieldType =
	EmitTypeForVarWithBlocksAttr(variable, &xoffset).BlockByRefWrapper;
	fieldType = DBuilder.createPointerType(fieldType, PtrInfo.Width);
	fieldType = DBuilder.createMemberType(tunit, name, tunit, line,
	PtrInfo.Width, Align, offsetInBits,
	llvm::DINode::FlagZero, fieldType);
	} else {
	auto Align = getDeclAlignIfRequired(variable, CGM.getContext());
	fieldType = createFieldType(name, variable->getType(), loc, AS_public,
	offsetInBits, Align, tunit, tunit);
	}
	fields.push_back(fieldType);
	}

	SmallString<36> typeName;
	llvm::raw_svector_ostream(typeName)
	<< "__block_literal_" << CGM.getUniqueBlockCount();

	llvm::DINodeArray fieldsArray = DBuilder.getOrCreateArray(fields);

	llvm::DIType *type =
	DBuilder.createStructType(tunit, typeName.str(), tunit, line,
	CGM.getContext().toBits(block.BlockSize), 0,
	llvm::DINode::FlagZero, nullptr, fieldsArray);
	type = DBuilder.createPointerType(type, CGM.PointerWidthInBits);

	// Get overall information about the block.
	llvm::DINode::DIFlags flags = llvm::DINode::FlagArtificial;
	auto *scope = cast<llvm::DILocalScope>(LexicalBlockStack.back());

	// Create the descriptor for the parameter.
	auto *debugVar = DBuilder.createParameterVariable(
	scope, Name, ArgNo, tunit, line, type, CGM.getLangOpts().Optimize, flags);

	// Insert an llvm.dbg.declare into the current block.
	DBuilder.insertDeclare(Alloca, debugVar, DBuilder.createExpression(),
	llvm::DebugLoc::get(line, column, scope, CurInlinedAt),
	Builder.GetInsertBlock());
	}

	llvm::DIDerivedType *
	CGDebugInfo::getOrCreateStaticDataMemberDeclarationOrNull(const VarDecl *D) {
	if (!D->isStaticDataMember())
	return nullptr;

	auto MI = StaticDataMemberCache.find(D->getCanonicalDecl());
	if (MI != StaticDataMemberCache.end()) {
	assert(MI->second && "Static data member declaration should still exist");
	return MI->second;
	}

	// If the member wasn't found in the cache, lazily construct and add it to the
	// type (used when a limited form of the type is emitted).
	auto DC = D->getDeclContext();
	auto *Ctxt = cast<llvm::DICompositeType>(getDeclContextDescriptor(D));
	return CreateRecordStaticField(D, Ctxt, cast<RecordDecl>(DC));
	}

	llvm::DIGlobalVariableExpression *CGDebugInfo::CollectAnonRecordDecls(
	const RecordDecl RD, llvm::DIFile Unit, unsigned LineNo,
	StringRef LinkageName, llvm::GlobalVariable Var, llvm::DIScope DContext) {
	llvm::DIGlobalVariableExpression *GVE = nullptr;

	for (const auto *Field : RD->fields()) {
	llvm::DIType *FieldTy = getOrCreateType(Field->getType(), Unit);
	StringRef FieldName = Field->getName();

	// Ignore unnamed fields, but recurse into anonymous records.
	if (FieldName.empty()) {
	if (const auto *RT = dyn_cast<RecordType>(Field->getType()))
	GVE = CollectAnonRecordDecls(RT->getDecl(), Unit, LineNo, LinkageName,
	Var, DContext);
	continue;
	}
	// Use VarDecl's Tag, Scope and Line number.
	GVE = DBuilder.createGlobalVariableExpression(
	DContext, FieldName, LinkageName, Unit, LineNo, FieldTy,
	Var->hasLocalLinkage());
	Var->addDebugInfo(GVE);
	}
	return GVE;
	}

	void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var,
	const VarDecl *D) {
	assert(DebugKind >= codegenoptions::LimitedDebugInfo);
	if (D->hasAttr<NoDebugAttr>())
	return;

	// If we already created a DIGlobalVariable for this declaration, just attach
	// it to the llvm::GlobalVariable.
	auto Cached = DeclCache.find(D->getCanonicalDecl());
	if (Cached != DeclCache.end())
	return Var->addDebugInfo(
	cast<llvm::DIGlobalVariableExpression>(Cached->second));

	// Create global variable debug descriptor.
	llvm::DIFile *Unit = nullptr;
	llvm::DIScope *DContext = nullptr;
	unsigned LineNo;
	StringRef DeclName, LinkageName;
	QualType T;
	llvm::MDTuple *TemplateParameters = nullptr;
	collectVarDeclProps(D, Unit, LineNo, T, DeclName, LinkageName,
	TemplateParameters, DContext);

	// Attempt to store one global variable for the declaration - even if we
	// emit a lot of fields.
	llvm::DIGlobalVariableExpression *GVE = nullptr;

	// If this is an anonymous union then we'll want to emit a global
	// variable for each member of the anonymous union so that it's possible
	// to find the name of any field in the union.
	if (T->isUnionType() && DeclName.empty()) {
	const RecordDecl *RD = T->castAs<RecordType>()->getDecl();
	assert(RD->isAnonymousStructOrUnion() &&
	"unnamed non-anonymous struct or union?");
	GVE = CollectAnonRecordDecls(RD, Unit, LineNo, LinkageName, Var, DContext);
	} else {
	auto Align = getDeclAlignIfRequired(D, CGM.getContext());

	SmallVector<int64_t, 4> Expr;
	unsigned AddressSpace =
	CGM.getContext().getTargetAddressSpace(D->getType());
	AppendAddressSpaceXDeref(AddressSpace, Expr);

	GVE = DBuilder.createGlobalVariableExpression(
	DContext, DeclName, LinkageName, Unit, LineNo, getOrCreateType(T, Unit),
	Var->hasLocalLinkage(),
	Expr.empty() ? nullptr : DBuilder.createExpression(Expr),
	getOrCreateStaticDataMemberDeclarationOrNull(D), TemplateParameters,
	Align);
	Var->addDebugInfo(GVE);
	}
	DeclCache[D->getCanonicalDecl()].reset(GVE);
	}

	void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD, const APValue &Init) {
	assert(DebugKind >= codegenoptions::LimitedDebugInfo);
	if (VD->hasAttr<NoDebugAttr>())
	return;
	auto Align = getDeclAlignIfRequired(VD, CGM.getContext());
	// Create the descriptor for the variable.
	llvm::DIFile *Unit = getOrCreateFile(VD->getLocation());
	StringRef Name = VD->getName();
	llvm::DIType *Ty = getOrCreateType(VD->getType(), Unit);
	if (const auto *ECD = dyn_cast<EnumConstantDecl>(VD)) {
	const auto *ED = cast<EnumDecl>(ECD->getDeclContext());
	assert(isa<EnumType>(ED->getTypeForDecl()) && "Enum without EnumType?");
	Ty = getOrCreateType(QualType(ED->getTypeForDecl(), 0), Unit);
	}
	// Do not use global variables for enums.
	//
	// FIXME: why not?
	if (Ty->getTag() == llvm::dwarf::DW_TAG_enumeration_type)
	return;
	// Do not emit separate definitions for function local const/statics.
	if (isa<FunctionDecl>(VD->getDeclContext()))
	return;
	VD = cast<ValueDecl>(VD->getCanonicalDecl());
	auto *VarD = cast<VarDecl>(VD);
	if (VarD->isStaticDataMember()) {
	auto *RD = cast<RecordDecl>(VarD->getDeclContext());
	getDeclContextDescriptor(VarD);
	// Ensure that the type is retained even though it's otherwise unreferenced.
	//
	// FIXME: This is probably unnecessary, since Ty should reference RD
	// through its scope.
	RetainedTypes.push_back(
	CGM.getContext().getRecordType(RD).getAsOpaquePtr());
	return;
	}

	llvm::DIScope *DContext = getDeclContextDescriptor(VD);

	auto &GV = DeclCache[VD];
	if (GV)
	return;
	llvm::DIExpression *InitExpr = nullptr;
	if (CGM.getContext().getTypeSize(VD->getType()) <= 64) {
	// FIXME: Add a representation for integer constants wider than 64 bits.
	if (Init.isInt())
	InitExpr =
	DBuilder.createConstantValueExpression(Init.getInt().getExtValue());
	else if (Init.isFloat())
	InitExpr = DBuilder.createConstantValueExpression(
	Init.getFloat().bitcastToAPInt().getZExtValue());
	}

	llvm::MDTuple *TemplateParameters = nullptr;

	if (isa<VarTemplateSpecializationDecl>(VD))
	if (VarD) {
	llvm::DINodeArray parameterNodes = CollectVarTemplateParams(VarD, &*Unit);
	TemplateParameters = parameterNodes.get();
	}

	GV.reset(DBuilder.createGlobalVariableExpression(
	DContext, Name, StringRef(), Unit, getLineNumber(VD->getLocation()), Ty,
	true, InitExpr, getOrCreateStaticDataMemberDeclarationOrNull(VarD),
	TemplateParameters, Align));
	}

	llvm::DIScope CGDebugInfo::getCurrentContextDescriptor(const Decl D) {
	if (!LexicalBlockStack.empty())
	return LexicalBlockStack.back();
	llvm::DIScope *Mod = getParentModuleOrNull(D);
	return getContextDescriptor(D, Mod ? Mod : TheCU);
	}

	void CGDebugInfo::EmitUsingDirective(const UsingDirectiveDecl &UD) {
	if (CGM.getCodeGenOpts().getDebugInfo() < codegenoptions::LimitedDebugInfo)
	return;
	const NamespaceDecl *NSDecl = UD.getNominatedNamespace();
	if (!NSDecl->isAnonymousNamespace() \|\|
	CGM.getCodeGenOpts().DebugExplicitImport) {
	auto Loc = UD.getLocation();
	DBuilder.createImportedModule(
	getCurrentContextDescriptor(cast<Decl>(UD.getDeclContext())),
	getOrCreateNamespace(NSDecl), getOrCreateFile(Loc), getLineNumber(Loc));
	}
	}

	void CGDebugInfo::EmitUsingDecl(const UsingDecl &UD) {
	if (CGM.getCodeGenOpts().getDebugInfo() < codegenoptions::LimitedDebugInfo)
	return;
	assert(UD.shadow_size() &&
	"We shouldn't be codegening an invalid UsingDecl containing no decls");
	// Emitting one decl is sufficient - debuggers can detect that this is an
	// overloaded name & provide lookup for all the overloads.
	const UsingShadowDecl &USD = **UD.shadow_begin();

	// FIXME: Skip functions with undeduced auto return type for now since we
	// don't currently have the plumbing for separate declarations & definitions
	// of free functions and mismatched types (auto in the declaration, concrete
	// return type in the definition)
	if (const auto *FD = dyn_cast<FunctionDecl>(USD.getUnderlyingDecl()))
	if (const auto *AT =
	FD->getType()->getAs<FunctionProtoType>()->getContainedAutoType())
	if (AT->getDeducedType().isNull())
	return;
	if (llvm::DINode *Target =
	getDeclarationOrDefinition(USD.getUnderlyingDecl())) {
	auto Loc = USD.getLocation();
	DBuilder.createImportedDeclaration(
	getCurrentContextDescriptor(cast<Decl>(USD.getDeclContext())), Target,
	getOrCreateFile(Loc), getLineNumber(Loc));
	}
	}

	void CGDebugInfo::EmitImportDecl(const ImportDecl &ID) {
	if (CGM.getCodeGenOpts().getDebuggerTuning() != llvm::DebuggerKind::LLDB)
	return;
	if (Module *M = ID.getImportedModule()) {
	auto Info = ExternalASTSource::ASTSourceDescriptor(*M);
	auto Loc = ID.getLocation();
	DBuilder.createImportedDeclaration(
	getCurrentContextDescriptor(cast<Decl>(ID.getDeclContext())),
	getOrCreateModuleRef(Info, DebugTypeExtRefs), getOrCreateFile(Loc),
	getLineNumber(Loc));
	}
	}

	llvm::DIImportedEntity *
	CGDebugInfo::EmitNamespaceAlias(const NamespaceAliasDecl &NA) {
	if (CGM.getCodeGenOpts().getDebugInfo() < codegenoptions::LimitedDebugInfo)
	return nullptr;
	auto &VH = NamespaceAliasCache[&NA];
	if (VH)
	return cast<llvm::DIImportedEntity>(VH);
	llvm::DIImportedEntity *R;
	auto Loc = NA.getLocation();
	if (const auto *Underlying =
	dyn_cast<NamespaceAliasDecl>(NA.getAliasedNamespace()))
	// This could cache & dedup here rather than relying on metadata deduping.
	R = DBuilder.createImportedDeclaration(
	getCurrentContextDescriptor(cast<Decl>(NA.getDeclContext())),
	EmitNamespaceAlias(*Underlying), getOrCreateFile(Loc),
	getLineNumber(Loc), NA.getName());
	else
	R = DBuilder.createImportedDeclaration(
	getCurrentContextDescriptor(cast<Decl>(NA.getDeclContext())),
	getOrCreateNamespace(cast<NamespaceDecl>(NA.getAliasedNamespace())),
	getOrCreateFile(Loc), getLineNumber(Loc), NA.getName());
	VH.reset(R);
	return R;
	}

	llvm::DINamespace *
	CGDebugInfo::getOrCreateNamespace(const NamespaceDecl *NSDecl) {
	// Don't canonicalize the NamespaceDecl here: The DINamespace will be uniqued
	// if necessary, and this way multiple declarations of the same namespace in
	// different parent modules stay distinct.
	auto I = NamespaceCache.find(NSDecl);
	if (I != NamespaceCache.end())
	return cast<llvm::DINamespace>(I->second);

	llvm::DIScope *Context = getDeclContextDescriptor(NSDecl);
	// Don't trust the context if it is a DIModule (see comment above).
	llvm::DINamespace *NS =
	DBuilder.createNameSpace(Context, NSDecl->getName(), NSDecl->isInline());
	NamespaceCache[NSDecl].reset(NS);
	return NS;
	}

	void CGDebugInfo::setDwoId(uint64_t Signature) {
	assert(TheCU && "no main compile unit");
	TheCU->setDWOId(Signature);
	}

	void CGDebugInfo::finalize() {
	// Creating types might create further types - invalidating the current
	// element and the size(), so don't cache/reference them.
	for (size_t i = 0; i != ObjCInterfaceCache.size(); ++i) {
	ObjCInterfaceCacheEntry E = ObjCInterfaceCache[i];
	llvm::DIType *Ty = E.Type->getDecl()->getDefinition()
	? CreateTypeDefinition(E.Type, E.Unit)
	: E.Decl;
	DBuilder.replaceTemporary(llvm::TempDIType(E.Decl), Ty);
	}

	if (CGM.getCodeGenOpts().DwarfVersion >= 5) {
	// Add methods to interface.
	for (const auto &P : ObjCMethodCache) {
	if (P.second.empty())
	continue;

	QualType QTy(P.first->getTypeForDecl(), 0);
	auto It = TypeCache.find(QTy.getAsOpaquePtr());
	assert(It != TypeCache.end());

	llvm::DICompositeType *InterfaceDecl =
	cast<llvm::DICompositeType>(It->second);

	SmallVector<llvm::Metadata *, 16> EltTys;
	auto CurrenetElts = InterfaceDecl->getElements();
	EltTys.append(CurrenetElts.begin(), CurrenetElts.end());
	for (auto &MD : P.second)
	EltTys.push_back(MD);
	llvm::DINodeArray Elements = DBuilder.getOrCreateArray(EltTys);
	DBuilder.replaceArrays(InterfaceDecl, Elements);
	}
	}

	for (const auto &P : ReplaceMap) {
	assert(P.second);
	auto *Ty = cast<llvm::DIType>(P.second);
	assert(Ty->isForwardDecl());

	auto It = TypeCache.find(P.first);
	assert(It != TypeCache.end());
	assert(It->second);

	DBuilder.replaceTemporary(llvm::TempDIType(Ty),
	cast<llvm::DIType>(It->second));
	}

	for (const auto &P : FwdDeclReplaceMap) {
	assert(P.second);
	llvm::TempMDNode FwdDecl(cast<llvm::MDNode>(P.second));
	llvm::Metadata *Repl;

	auto It = DeclCache.find(P.first);
	// If there has been no definition for the declaration, call RAUW
	// with ourselves, that will destroy the temporary MDNode and
	// replace it with a standard one, avoiding leaking memory.
	if (It == DeclCache.end())
	Repl = P.second;
	else
	Repl = It->second;

	if (auto *GVE = dyn_cast_or_null<llvm::DIGlobalVariableExpression>(Repl))
	Repl = GVE->getVariable();
	DBuilder.replaceTemporary(std::move(FwdDecl), cast<llvm::MDNode>(Repl));
	}

	// We keep our own list of retained types, because we need to look
	// up the final type in the type cache.
	for (auto &RT : RetainedTypes)
	if (auto MD = TypeCache[RT])
	DBuilder.retainType(cast<llvm::DIType>(MD));

	DBuilder.finalize();
	}

	void CGDebugInfo::EmitExplicitCastType(QualType Ty) {
	if (CGM.getCodeGenOpts().getDebugInfo() < codegenoptions::LimitedDebugInfo)
	return;

	if (auto *DieTy = getOrCreateType(Ty, TheCU->getFile()))
	// Don't ignore in case of explicit cast where it is referenced indirectly.
	DBuilder.retainType(DieTy);
	}

	llvm::DebugLoc CGDebugInfo::SourceLocToDebugLoc(SourceLocation Loc) {
	if (LexicalBlockStack.empty())
	return llvm::DebugLoc();

	llvm::MDNode *Scope = LexicalBlockStack.back();
	return llvm::DebugLoc::get(getLineNumber(Loc), getColumnNumber(Loc), Scope);
	}

	llvm::DINode::DIFlags CGDebugInfo::getCallSiteRelatedAttrs() const {
	// Call site-related attributes are only useful in optimized programs, and
	// when there's a possibility of debugging backtraces.
	if (!CGM.getLangOpts().Optimize \|\| DebugKind == codegenoptions::NoDebugInfo \|\|
	DebugKind == codegenoptions::LocTrackingOnly)
	return llvm::DINode::FlagZero;

	// Call site-related attributes are available in DWARF v5. Some debuggers,
	// while not fully DWARF v5-compliant, may accept these attributes as if they
	// were part of DWARF v4.
	bool SupportsDWARFv4Ext =
	CGM.getCodeGenOpts().DwarfVersion == 4 &&
	CGM.getCodeGenOpts().getDebuggerTuning() == llvm::DebuggerKind::LLDB;
	if (!SupportsDWARFv4Ext && CGM.getCodeGenOpts().DwarfVersion < 5)
	return llvm::DINode::FlagZero;

	return llvm::DINode::FlagAllCallsDescribed;
	}
	Index: head/contrib/llvm/tools/clang/lib/Driver/ToolChains/Arch/PPC.cpp
	===================================================================
	--- head/contrib/llvm/tools/clang/lib/Driver/ToolChains/Arch/PPC.cpp (revision 349792)
	+++ head/contrib/llvm/tools/clang/lib/Driver/ToolChains/Arch/PPC.cpp (revision 349793)
	@@ -1,158 +1,158 @@
	//===--- PPC.cpp - PPC Helpers for Tools ------------------------- C++ --===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//

	#include "PPC.h"
	#include "ToolChains/CommonArgs.h"
	#include "clang/Driver/Driver.h"
	#include "clang/Driver/DriverDiagnostic.h"
	#include "clang/Driver/Options.h"
	#include "llvm/ADT/StringSwitch.h"
	#include "llvm/Option/ArgList.h"

	using namespace clang::driver;
	using namespace clang::driver::tools;
	using namespace clang;
	using namespace llvm::opt;

	/// getPPCTargetCPU - Get the (LLVM) name of the PowerPC cpu we are targeting.
	std::string ppc::getPPCTargetCPU(const ArgList &Args) {
	if (Arg *A = Args.getLastArg(clang::driver::options::OPT_mcpu_EQ)) {
	StringRef CPUName = A->getValue();

	if (CPUName == "native") {
	std::string CPU = llvm::sys::getHostCPUName();
	if (!CPU.empty() && CPU != "generic")
	return CPU;
	else
	return "";
	}

	return llvm::StringSwitch<const char *>(CPUName)
	.Case("common", "generic")
	.Case("440", "440")
	.Case("440fp", "440")
	.Case("450", "450")
	.Case("601", "601")
	.Case("602", "602")
	.Case("603", "603")
	.Case("603e", "603e")
	.Case("603ev", "603ev")
	.Case("604", "604")
	.Case("604e", "604e")
	.Case("620", "620")
	.Case("630", "pwr3")
	.Case("G3", "g3")
	.Case("7400", "7400")
	.Case("G4", "g4")
	.Case("7450", "7450")
	.Case("G4+", "g4+")
	.Case("750", "750")
	.Case("970", "970")
	.Case("G5", "g5")
	.Case("a2", "a2")
	.Case("a2q", "a2q")
	.Case("e500mc", "e500mc")
	.Case("e5500", "e5500")
	.Case("power3", "pwr3")
	.Case("power4", "pwr4")
	.Case("power5", "pwr5")
	.Case("power5x", "pwr5x")
	.Case("power6", "pwr6")
	.Case("power6x", "pwr6x")
	.Case("power7", "pwr7")
	.Case("power8", "pwr8")
	.Case("power9", "pwr9")
	.Case("pwr3", "pwr3")
	.Case("pwr4", "pwr4")
	.Case("pwr5", "pwr5")
	.Case("pwr5x", "pwr5x")
	.Case("pwr6", "pwr6")
	.Case("pwr6x", "pwr6x")
	.Case("pwr7", "pwr7")
	.Case("pwr8", "pwr8")
	.Case("pwr9", "pwr9")
	.Case("powerpc", "ppc")
	.Case("powerpc64", "ppc64")
	.Case("powerpc64le", "ppc64le")
	.Default("");
	}

	return "";
	}

	const char *ppc::getPPCAsmModeForCPU(StringRef Name) {
	return llvm::StringSwitch<const char *>(Name)
	.Case("pwr7", "-mpower7")
	.Case("power7", "-mpower7")
	.Case("pwr8", "-mpower8")
	.Case("power8", "-mpower8")
	.Case("ppc64le", "-mpower8")
	.Case("pwr9", "-mpower9")
	.Case("power9", "-mpower9")
	.Default("-many");
	}

	void ppc::getPPCTargetFeatures(const Driver &D, const llvm::Triple &Triple,
	const ArgList &Args,
	std::vector<StringRef> &Features) {
	handleTargetFeaturesGroup(Args, Features, options::OPT_m_ppc_Features_Group);

	ppc::FloatABI FloatABI = ppc::getPPCFloatABI(D, Args);
	if (FloatABI == ppc::FloatABI::Soft)
	Features.push_back("-hard-float");

	ppc::ReadGOTPtrMode ReadGOT = ppc::getPPCReadGOTPtrMode(D, Triple, Args);
	if (ReadGOT == ppc::ReadGOTPtrMode::SecurePlt)
	Features.push_back("+secure-plt");
	}

	ppc::ReadGOTPtrMode ppc::getPPCReadGOTPtrMode(const Driver &D, const llvm::Triple &Triple,
	const ArgList &Args) {
	if (Args.getLastArg(options::OPT_msecure_plt))
	return ppc::ReadGOTPtrMode::SecurePlt;
	if ((Triple.isOSFreeBSD() && Triple.getOSMajorVersion() >= 13) \|\|
	- Triple.isOSOpenBSD())
	+ Triple.isOSNetBSD() \|\| Triple.isOSOpenBSD())
	return ppc::ReadGOTPtrMode::SecurePlt;
	else
	return ppc::ReadGOTPtrMode::Bss;
	}

	ppc::FloatABI ppc::getPPCFloatABI(const Driver &D, const ArgList &Args) {
	ppc::FloatABI ABI = ppc::FloatABI::Invalid;
	if (Arg *A =
	Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float,
	options::OPT_mfloat_abi_EQ)) {
	if (A->getOption().matches(options::OPT_msoft_float))
	ABI = ppc::FloatABI::Soft;
	else if (A->getOption().matches(options::OPT_mhard_float))
	ABI = ppc::FloatABI::Hard;
	else {
	ABI = llvm::StringSwitch<ppc::FloatABI>(A->getValue())
	.Case("soft", ppc::FloatABI::Soft)
	.Case("hard", ppc::FloatABI::Hard)
	.Default(ppc::FloatABI::Invalid);
	if (ABI == ppc::FloatABI::Invalid && !StringRef(A->getValue()).empty()) {
	D.Diag(clang::diag::err_drv_invalid_mfloat_abi) << A->getAsString(Args);
	ABI = ppc::FloatABI::Hard;
	}
	}
	}

	// If unspecified, choose the default based on the platform.
	if (ABI == ppc::FloatABI::Invalid) {
	ABI = ppc::FloatABI::Hard;
	}

	return ABI;
	}

	bool ppc::hasPPCAbiArg(const ArgList &Args, const char *Value) {
	Arg *A = Args.getLastArg(options::OPT_mabi_EQ);
	return A && (A->getValue() == StringRef(Value));
	}
	Index: head/contrib/llvm/tools/clang
	===================================================================
	--- head/contrib/llvm/tools/clang (revision 349792)
	+++ head/contrib/llvm/tools/clang (revision 349793)

	Property changes on: head/contrib/llvm/tools/clang
	___________________________________________________________________
	Modified: svn:mergeinfo
	## -0,0 +0,1 ##
	Merged /vendor/clang/dist-release_80:r348963-349790
	Index: head/contrib/llvm/tools/lld/ELF/Arch/PPC64.cpp
	===================================================================
	--- head/contrib/llvm/tools/lld/ELF/Arch/PPC64.cpp (revision 349792)
	+++ head/contrib/llvm/tools/lld/ELF/Arch/PPC64.cpp (revision 349793)
	@@ -1,940 +1,943 @@
	//===- PPC64.cpp ----------------------------------------------------------===//
	//
	// The LLVM Linker
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//

	#include "Symbols.h"
	#include "SyntheticSections.h"
	#include "Target.h"
	#include "lld/Common/ErrorHandler.h"
	#include "llvm/Support/Endian.h"

	using namespace llvm;
	using namespace llvm::object;
	using namespace llvm::support::endian;
	using namespace llvm::ELF;
	using namespace lld;
	using namespace lld::elf;

	static uint64_t PPC64TocOffset = 0x8000;
	static uint64_t DynamicThreadPointerOffset = 0x8000;

	// The instruction encoding of bits 21-30 from the ISA for the Xform and Dform
	// instructions that can be used as part of the initial exec TLS sequence.
	enum XFormOpcd {
	LBZX = 87,
	LHZX = 279,
	LWZX = 23,
	LDX = 21,
	STBX = 215,
	STHX = 407,
	STWX = 151,
	STDX = 149,
	ADD = 266,
	};

	enum DFormOpcd {
	LBZ = 34,
	LBZU = 35,
	LHZ = 40,
	LHZU = 41,
	LHAU = 43,
	LWZ = 32,
	LWZU = 33,
	LFSU = 49,
	LD = 58,
	LFDU = 51,
	STB = 38,
	STBU = 39,
	STH = 44,
	STHU = 45,
	STW = 36,
	STWU = 37,
	STFSU = 53,
	STFDU = 55,
	STD = 62,
	ADDI = 14
	};

	uint64_t elf::getPPC64TocBase() {
	// The TOC consists of sections .got, .toc, .tocbss, .plt in that order. The
	// TOC starts where the first of these sections starts. We always create a
	// .got when we see a relocation that uses it, so for us the start is always
	// the .got.
	uint64_t TocVA = In.Got->getVA();

	// Per the ppc64-elf-linux ABI, The TOC base is TOC value plus 0x8000
	// thus permitting a full 64 Kbytes segment. Note that the glibc startup
	// code (crt1.o) assumes that you can get from the TOC base to the
	// start of the .toc section with only a single (signed) 16-bit relocation.
	return TocVA + PPC64TocOffset;
	}

	unsigned elf::getPPC64GlobalEntryToLocalEntryOffset(uint8_t StOther) {
	// The offset is encoded into the 3 most significant bits of the st_other
	// field, with some special values described in section 3.4.1 of the ABI:
	// 0 --> Zero offset between the GEP and LEP, and the function does NOT use
	// the TOC pointer (r2). r2 will hold the same value on returning from
	// the function as it did on entering the function.
	// 1 --> Zero offset between the GEP and LEP, and r2 should be treated as a
	// caller-saved register for all callers.
	// 2-6 --> The binary logarithm of the offset eg:
	// 2 --> 2^2 = 4 bytes --> 1 instruction.
	// 6 --> 2^6 = 64 bytes --> 16 instructions.
	// 7 --> Reserved.
	uint8_t GepToLep = (StOther >> 5) & 7;
	if (GepToLep < 2)
	return 0;

	// The value encoded in the st_other bits is the
	// log-base-2(offset).
	if (GepToLep < 7)
	return 1 << GepToLep;

	error("reserved value of 7 in the 3 most-significant-bits of st_other");
	return 0;
	}

	namespace {
	class PPC64 final : public TargetInfo {
	public:
	PPC64();
	uint32_t calcEFlags() const override;
	RelExpr getRelExpr(RelType Type, const Symbol &S,
	const uint8_t *Loc) const override;
	void writePltHeader(uint8_t *Buf) const override;
	void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
	int32_t Index, unsigned RelOff) const override;
	void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
	void writeGotHeader(uint8_t *Buf) const override;
	bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
	uint64_t BranchAddr, const Symbol &S) const override;
	uint32_t getThunkSectionSpacing() const override;
	bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const override;
	RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data,
	RelExpr Expr) const override;
	void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
	void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
	void relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
	void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;

	bool adjustPrologueForCrossSplitStack(uint8_t Loc, uint8_t End,
	uint8_t StOther) const override;
	};
	} // namespace

	// Relocation masks following the #lo(value), #hi(value), #ha(value),
	// #higher(value), #highera(value), #highest(value), and #highesta(value)
	// macros defined in section 4.5.1. Relocation Types of the PPC-elf64abi
	// document.
	static uint16_t lo(uint64_t V) { return V; }
	static uint16_t hi(uint64_t V) { return V >> 16; }
	static uint16_t ha(uint64_t V) { return (V + 0x8000) >> 16; }
	static uint16_t higher(uint64_t V) { return V >> 32; }
	static uint16_t highera(uint64_t V) { return (V + 0x8000) >> 32; }
	static uint16_t highest(uint64_t V) { return V >> 48; }
	static uint16_t highesta(uint64_t V) { return (V + 0x8000) >> 48; }

	// Extracts the 'PO' field of an instruction encoding.
	static uint8_t getPrimaryOpCode(uint32_t Encoding) { return (Encoding >> 26); }

	static bool isDQFormInstruction(uint32_t Encoding) {
	switch (getPrimaryOpCode(Encoding)) {
	default:
	return false;
	case 56:
	// The only instruction with a primary opcode of 56 is `lq`.
	return true;
	case 61:
	// There are both DS and DQ instruction forms with this primary opcode.
	// Namely `lxv` and `stxv` are the DQ-forms that use it.
	// The DS 'XO' bits being set to 01 is restricted to DQ form.
	return (Encoding & 3) == 0x1;
	}
	}

	static bool isInstructionUpdateForm(uint32_t Encoding) {
	switch (getPrimaryOpCode(Encoding)) {
	default:
	return false;
	case LBZU:
	case LHAU:
	case LHZU:
	case LWZU:
	case LFSU:
	case LFDU:
	case STBU:
	case STHU:
	case STWU:
	case STFSU:
	case STFDU:
	return true;
	// LWA has the same opcode as LD, and the DS bits is what differentiates
	// between LD/LDU/LWA
	case LD:
	case STD:
	return (Encoding & 3) == 1;
	}
	}

	// There are a number of places when we either want to read or write an
	// instruction when handling a half16 relocation type. On big-endian the buffer
	// pointer is pointing into the middle of the word we want to extract, and on
	// little-endian it is pointing to the start of the word. These 2 helpers are to
	// simplify reading and writing in that context.
	static void writeInstrFromHalf16(uint8_t *Loc, uint32_t Instr) {
	write32(Loc - (Config->EKind == ELF64BEKind ? 2 : 0), Instr);
	}

	static uint32_t readInstrFromHalf16(const uint8_t *Loc) {
	return read32(Loc - (Config->EKind == ELF64BEKind ? 2 : 0));
	}

	PPC64::PPC64() {
	GotRel = R_PPC64_GLOB_DAT;
	NoneRel = R_PPC64_NONE;
	PltRel = R_PPC64_JMP_SLOT;
	RelativeRel = R_PPC64_RELATIVE;
	IRelativeRel = R_PPC64_IRELATIVE;
	GotEntrySize = 8;
	PltEntrySize = 4;
	GotPltEntrySize = 8;
	GotBaseSymInGotPlt = false;
	GotBaseSymOff = 0x8000;
	GotHeaderEntriesNum = 1;
	GotPltHeaderEntriesNum = 2;
	PltHeaderSize = 60;
	NeedsThunks = true;

	TlsModuleIndexRel = R_PPC64_DTPMOD64;
	TlsOffsetRel = R_PPC64_DTPREL64;

	TlsGotRel = R_PPC64_TPREL64;

	NeedsMoreStackNonSplit = false;

	// We need 64K pages (at least under glibc/Linux, the loader won't
	// set different permissions on a finer granularity than that).
	DefaultMaxPageSize = 65536;

	// The PPC64 ELF ABI v1 spec, says:
	//
	// It is normally desirable to put segments with different characteristics
	// in separate 256 Mbyte portions of the address space, to give the
	// operating system full paging flexibility in the 64-bit address space.
	//
	// And because the lowest non-zero 256M boundary is 0x10000000, PPC64 linkers
	// use 0x10000000 as the starting address.
	DefaultImageBase = 0x10000000;

	write32(TrapInstr.data(), 0x7fe00008);
	}

	static uint32_t getEFlags(InputFile *File) {
	if (Config->EKind == ELF64BEKind)
	return cast<ObjFile<ELF64BE>>(File)->getObj().getHeader()->e_flags;
	return cast<ObjFile<ELF64LE>>(File)->getObj().getHeader()->e_flags;
	}

	// This file implements v2 ABI. This function makes sure that all
	// object files have v2 or an unspecified version as an ABI version.
	uint32_t PPC64::calcEFlags() const {
	for (InputFile *F : ObjectFiles) {
	uint32_t Flag = getEFlags(F);
	if (Flag == 1)
	error(toString(F) + ": ABI version 1 is not supported");
	else if (Flag > 2)
	error(toString(F) + ": unrecognized e_flags: " + Twine(Flag));
	}
	return 2;
	}

	void PPC64::relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
	// Reference: 3.7.4.2 of the 64-bit ELF V2 abi supplement.
	// The general dynamic code sequence for a global `x` will look like:
	// Instruction Relocation Symbol
	// addis r3, r2, x@got@tlsgd@ha R_PPC64_GOT_TLSGD16_HA x
	// addi r3, r3, x@got@tlsgd@l R_PPC64_GOT_TLSGD16_LO x
	// bl __tls_get_addr(x@tlsgd) R_PPC64_TLSGD x
	// R_PPC64_REL24 __tls_get_addr
	// nop None None

	// Relaxing to local exec entails converting:
	// addis r3, r2, x@got@tlsgd@ha into nop
	// addi r3, r3, x@got@tlsgd@l into addis r3, r13, x@tprel@ha
	// bl __tls_get_addr(x@tlsgd) into nop
	// nop into addi r3, r3, x@tprel@l

	switch (Type) {
	case R_PPC64_GOT_TLSGD16_HA:
	writeInstrFromHalf16(Loc, 0x60000000); // nop
	break;
	case R_PPC64_GOT_TLSGD16:
	case R_PPC64_GOT_TLSGD16_LO:
	writeInstrFromHalf16(Loc, 0x3c6d0000); // addis r3, r13
	relocateOne(Loc, R_PPC64_TPREL16_HA, Val);
	break;
	case R_PPC64_TLSGD:
	write32(Loc, 0x60000000); // nop
	write32(Loc + 4, 0x38630000); // addi r3, r3
	// Since we are relocating a half16 type relocation and Loc + 4 points to
	// the start of an instruction we need to advance the buffer by an extra
	// 2 bytes on BE.
	relocateOne(Loc + 4 + (Config->EKind == ELF64BEKind ? 2 : 0),
	R_PPC64_TPREL16_LO, Val);
	break;
	default:
	llvm_unreachable("unsupported relocation for TLS GD to LE relaxation");
	}
	}

	void PPC64::relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
	// Reference: 3.7.4.3 of the 64-bit ELF V2 abi supplement.
	// The local dynamic code sequence for a global `x` will look like:
	// Instruction Relocation Symbol
	// addis r3, r2, x@got@tlsld@ha R_PPC64_GOT_TLSLD16_HA x
	// addi r3, r3, x@got@tlsld@l R_PPC64_GOT_TLSLD16_LO x
	// bl __tls_get_addr(x@tlsgd) R_PPC64_TLSLD x
	// R_PPC64_REL24 __tls_get_addr
	// nop None None

	// Relaxing to local exec entails converting:
	// addis r3, r2, x@got@tlsld@ha into nop
	// addi r3, r3, x@got@tlsld@l into addis r3, r13, 0
	// bl __tls_get_addr(x@tlsgd) into nop
	// nop into addi r3, r3, 4096

	switch (Type) {
	case R_PPC64_GOT_TLSLD16_HA:
	writeInstrFromHalf16(Loc, 0x60000000); // nop
	break;
	case R_PPC64_GOT_TLSLD16_LO:
	writeInstrFromHalf16(Loc, 0x3c6d0000); // addis r3, r13, 0
	break;
	case R_PPC64_TLSLD:
	write32(Loc, 0x60000000); // nop
	write32(Loc + 4, 0x38631000); // addi r3, r3, 4096
	break;
	case R_PPC64_DTPREL16:
	case R_PPC64_DTPREL16_HA:
	case R_PPC64_DTPREL16_HI:
	case R_PPC64_DTPREL16_DS:
	case R_PPC64_DTPREL16_LO:
	case R_PPC64_DTPREL16_LO_DS:
	case R_PPC64_GOT_DTPREL16_HA:
	case R_PPC64_GOT_DTPREL16_LO_DS:
	case R_PPC64_GOT_DTPREL16_DS:
	case R_PPC64_GOT_DTPREL16_HI:
	relocateOne(Loc, Type, Val);
	break;
	default:
	llvm_unreachable("unsupported relocation for TLS LD to LE relaxation");
	}
	}

	static unsigned getDFormOp(unsigned SecondaryOp) {
	switch (SecondaryOp) {
	case LBZX:
	return LBZ;
	case LHZX:
	return LHZ;
	case LWZX:
	return LWZ;
	case LDX:
	return LD;
	case STBX:
	return STB;
	case STHX:
	return STH;
	case STWX:
	return STW;
	case STDX:
	return STD;
	case ADD:
	return ADDI;
	default:
	error("unrecognized instruction for IE to LE R_PPC64_TLS");
	return 0;
	}
	}

	void PPC64::relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
	// The initial exec code sequence for a global `x` will look like:
	// Instruction Relocation Symbol
	// addis r9, r2, x@got@tprel@ha R_PPC64_GOT_TPREL16_HA x
	// ld r9, x@got@tprel@l(r9) R_PPC64_GOT_TPREL16_LO_DS x
	// add r9, r9, x@tls R_PPC64_TLS x

	// Relaxing to local exec entails converting:
	// addis r9, r2, x@got@tprel@ha into nop
	// ld r9, x@got@tprel@l(r9) into addis r9, r13, x@tprel@ha
	// add r9, r9, x@tls into addi r9, r9, x@tprel@l

	// x@tls R_PPC64_TLS is a relocation which does not compute anything,
	// it is replaced with r13 (thread pointer).

	// The add instruction in the initial exec sequence has multiple variations
	// that need to be handled. If we are building an address it will use an add
	// instruction, if we are accessing memory it will use any of the X-form
	// indexed load or store instructions.

	unsigned Offset = (Config->EKind == ELF64BEKind) ? 2 : 0;
	switch (Type) {
	case R_PPC64_GOT_TPREL16_HA:
	write32(Loc - Offset, 0x60000000); // nop
	break;
	case R_PPC64_GOT_TPREL16_LO_DS:
	case R_PPC64_GOT_TPREL16_DS: {
	uint32_t RegNo = read32(Loc - Offset) & 0x03E00000; // bits 6-10
	write32(Loc - Offset, 0x3C0D0000 \| RegNo); // addis RegNo, r13
	relocateOne(Loc, R_PPC64_TPREL16_HA, Val);
	break;
	}
	case R_PPC64_TLS: {
	uint32_t PrimaryOp = getPrimaryOpCode(read32(Loc));
	if (PrimaryOp != 31)
	error("unrecognized instruction for IE to LE R_PPC64_TLS");
	uint32_t SecondaryOp = (read32(Loc) & 0x000007FE) >> 1; // bits 21-30
	uint32_t DFormOp = getDFormOp(SecondaryOp);
	write32(Loc, ((DFormOp << 26) \| (read32(Loc) & 0x03FFFFFF)));
	relocateOne(Loc + Offset, R_PPC64_TPREL16_LO, Val);
	break;
	}
	default:
	llvm_unreachable("unknown relocation for IE to LE");
	break;
	}
	}

	RelExpr PPC64::getRelExpr(RelType Type, const Symbol &S,
	const uint8_t *Loc) const {
	switch (Type) {
	case R_PPC64_GOT16:
	case R_PPC64_GOT16_DS:
	case R_PPC64_GOT16_HA:
	case R_PPC64_GOT16_HI:
	case R_PPC64_GOT16_LO:
	case R_PPC64_GOT16_LO_DS:
	return R_GOT_OFF;
	case R_PPC64_TOC16:
	case R_PPC64_TOC16_DS:
	case R_PPC64_TOC16_HA:
	case R_PPC64_TOC16_HI:
	case R_PPC64_TOC16_LO:
	case R_PPC64_TOC16_LO_DS:
	return R_GOTREL;
	case R_PPC64_TOC:
	return R_PPC_TOC;
	case R_PPC64_REL14:
	case R_PPC64_REL24:
	return R_PPC_CALL_PLT;
	case R_PPC64_REL16_LO:
	case R_PPC64_REL16_HA:
	case R_PPC64_REL32:
	case R_PPC64_REL64:
	return R_PC;
	case R_PPC64_GOT_TLSGD16:
	case R_PPC64_GOT_TLSGD16_HA:
	case R_PPC64_GOT_TLSGD16_HI:
	case R_PPC64_GOT_TLSGD16_LO:
	return R_TLSGD_GOT;
	case R_PPC64_GOT_TLSLD16:
	case R_PPC64_GOT_TLSLD16_HA:
	case R_PPC64_GOT_TLSLD16_HI:
	case R_PPC64_GOT_TLSLD16_LO:
	return R_TLSLD_GOT;
	case R_PPC64_GOT_TPREL16_HA:
	case R_PPC64_GOT_TPREL16_LO_DS:
	case R_PPC64_GOT_TPREL16_DS:
	case R_PPC64_GOT_TPREL16_HI:
	return R_GOT_OFF;
	case R_PPC64_GOT_DTPREL16_HA:
	case R_PPC64_GOT_DTPREL16_LO_DS:
	case R_PPC64_GOT_DTPREL16_DS:
	case R_PPC64_GOT_DTPREL16_HI:
	return R_TLSLD_GOT_OFF;
	case R_PPC64_TPREL16:
	case R_PPC64_TPREL16_HA:
	case R_PPC64_TPREL16_LO:
	case R_PPC64_TPREL16_HI:
	case R_PPC64_TPREL16_DS:
	case R_PPC64_TPREL16_LO_DS:
	case R_PPC64_TPREL16_HIGHER:
	case R_PPC64_TPREL16_HIGHERA:
	case R_PPC64_TPREL16_HIGHEST:
	case R_PPC64_TPREL16_HIGHESTA:
	return R_TLS;
	case R_PPC64_DTPREL16:
	case R_PPC64_DTPREL16_DS:
	case R_PPC64_DTPREL16_HA:
	case R_PPC64_DTPREL16_HI:
	case R_PPC64_DTPREL16_HIGHER:
	case R_PPC64_DTPREL16_HIGHERA:
	case R_PPC64_DTPREL16_HIGHEST:
	case R_PPC64_DTPREL16_HIGHESTA:
	case R_PPC64_DTPREL16_LO:
	case R_PPC64_DTPREL16_LO_DS:
	case R_PPC64_DTPREL64:
	return R_ABS;
	case R_PPC64_TLSGD:
	return R_TLSDESC_CALL;
	case R_PPC64_TLSLD:
	return R_TLSLD_HINT;
	case R_PPC64_TLS:
	return R_TLSIE_HINT;
	default:
	return R_ABS;
	}
	}

	void PPC64::writeGotHeader(uint8_t *Buf) const {
	write64(Buf, getPPC64TocBase());
	}

	void PPC64::writePltHeader(uint8_t *Buf) const {
	// The generic resolver stub goes first.
	write32(Buf + 0, 0x7c0802a6); // mflr r0
	write32(Buf + 4, 0x429f0005); // bcl 20,4*cr7+so,8 <_glink+0x8>
	write32(Buf + 8, 0x7d6802a6); // mflr r11
	write32(Buf + 12, 0x7c0803a6); // mtlr r0
	write32(Buf + 16, 0x7d8b6050); // subf r12, r11, r12
	write32(Buf + 20, 0x380cffcc); // subi r0,r12,52
	write32(Buf + 24, 0x7800f082); // srdi r0,r0,62,2
	write32(Buf + 28, 0xe98b002c); // ld r12,44(r11)
	write32(Buf + 32, 0x7d6c5a14); // add r11,r12,r11
	write32(Buf + 36, 0xe98b0000); // ld r12,0(r11)
	write32(Buf + 40, 0xe96b0008); // ld r11,8(r11)
	write32(Buf + 44, 0x7d8903a6); // mtctr r12
	write32(Buf + 48, 0x4e800420); // bctr

	// The 'bcl' instruction will set the link register to the address of the
	// following instruction ('mflr r11'). Here we store the offset from that
	// instruction to the first entry in the GotPlt section.
	int64_t GotPltOffset = In.GotPlt->getVA() - (In.Plt->getVA() + 8);
	write64(Buf + 52, GotPltOffset);
	}

	void PPC64::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
	uint64_t PltEntryAddr, int32_t Index,
	unsigned RelOff) const {
	int32_t Offset = PltHeaderSize + Index * PltEntrySize;
	// bl __glink_PLTresolve
	write32(Buf, 0x48000000 \| ((-Offset) & 0x03FFFFFc));
	}

	static std::pair<RelType, uint64_t> toAddr16Rel(RelType Type, uint64_t Val) {
	// Relocations relative to the toc-base need to be adjusted by the Toc offset.
	uint64_t TocBiasedVal = Val - PPC64TocOffset;
	// Relocations relative to dtv[dtpmod] need to be adjusted by the DTP offset.
	uint64_t DTPBiasedVal = Val - DynamicThreadPointerOffset;

	switch (Type) {
	// TOC biased relocation.
	case R_PPC64_GOT16:
	case R_PPC64_GOT_TLSGD16:
	case R_PPC64_GOT_TLSLD16:
	case R_PPC64_TOC16:
	return {R_PPC64_ADDR16, TocBiasedVal};
	case R_PPC64_GOT16_DS:
	case R_PPC64_TOC16_DS:
	case R_PPC64_GOT_TPREL16_DS:
	case R_PPC64_GOT_DTPREL16_DS:
	return {R_PPC64_ADDR16_DS, TocBiasedVal};
	case R_PPC64_GOT16_HA:
	case R_PPC64_GOT_TLSGD16_HA:
	case R_PPC64_GOT_TLSLD16_HA:
	case R_PPC64_GOT_TPREL16_HA:
	case R_PPC64_GOT_DTPREL16_HA:
	case R_PPC64_TOC16_HA:
	return {R_PPC64_ADDR16_HA, TocBiasedVal};
	case R_PPC64_GOT16_HI:
	case R_PPC64_GOT_TLSGD16_HI:
	case R_PPC64_GOT_TLSLD16_HI:
	case R_PPC64_GOT_TPREL16_HI:
	case R_PPC64_GOT_DTPREL16_HI:
	case R_PPC64_TOC16_HI:
	return {R_PPC64_ADDR16_HI, TocBiasedVal};
	case R_PPC64_GOT16_LO:
	case R_PPC64_GOT_TLSGD16_LO:
	case R_PPC64_GOT_TLSLD16_LO:
	case R_PPC64_TOC16_LO:
	return {R_PPC64_ADDR16_LO, TocBiasedVal};
	case R_PPC64_GOT16_LO_DS:
	case R_PPC64_TOC16_LO_DS:
	case R_PPC64_GOT_TPREL16_LO_DS:
	case R_PPC64_GOT_DTPREL16_LO_DS:
	return {R_PPC64_ADDR16_LO_DS, TocBiasedVal};

	// Dynamic Thread pointer biased relocation types.
	case R_PPC64_DTPREL16:
	return {R_PPC64_ADDR16, DTPBiasedVal};
	case R_PPC64_DTPREL16_DS:
	return {R_PPC64_ADDR16_DS, DTPBiasedVal};
	case R_PPC64_DTPREL16_HA:
	return {R_PPC64_ADDR16_HA, DTPBiasedVal};
	case R_PPC64_DTPREL16_HI:
	return {R_PPC64_ADDR16_HI, DTPBiasedVal};
	case R_PPC64_DTPREL16_HIGHER:
	return {R_PPC64_ADDR16_HIGHER, DTPBiasedVal};
	case R_PPC64_DTPREL16_HIGHERA:
	return {R_PPC64_ADDR16_HIGHERA, DTPBiasedVal};
	case R_PPC64_DTPREL16_HIGHEST:
	return {R_PPC64_ADDR16_HIGHEST, DTPBiasedVal};
	case R_PPC64_DTPREL16_HIGHESTA:
	return {R_PPC64_ADDR16_HIGHESTA, DTPBiasedVal};
	case R_PPC64_DTPREL16_LO:
	return {R_PPC64_ADDR16_LO, DTPBiasedVal};
	case R_PPC64_DTPREL16_LO_DS:
	return {R_PPC64_ADDR16_LO_DS, DTPBiasedVal};
	case R_PPC64_DTPREL64:
	return {R_PPC64_ADDR64, DTPBiasedVal};

	default:
	return {Type, Val};
	}
	}

	static bool isTocOptType(RelType Type) {
	switch (Type) {
	case R_PPC64_GOT16_HA:
	case R_PPC64_GOT16_LO_DS:
	case R_PPC64_TOC16_HA:
	case R_PPC64_TOC16_LO_DS:
	case R_PPC64_TOC16_LO:
	return true;
	default:
	return false;
	}
	}

	void PPC64::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
	// We need to save the original relocation type to use in diagnostics, and
	// use the original type to determine if we should toc-optimize the
	// instructions being relocated.
	RelType OriginalType = Type;
	bool ShouldTocOptimize = isTocOptType(Type);
	// For dynamic thread pointer relative, toc-relative, and got-indirect
	// relocations, proceed in terms of the corresponding ADDR16 relocation type.
	std::tie(Type, Val) = toAddr16Rel(Type, Val);

	switch (Type) {
	case R_PPC64_ADDR14: {
	checkAlignment(Loc, Val, 4, Type);
	// Preserve the AA/LK bits in the branch instruction
	uint8_t AALK = Loc[3];
	write16(Loc + 2, (AALK & 3) \| (Val & 0xfffc));
	break;
	}
	case R_PPC64_ADDR16:
	case R_PPC64_TPREL16:
	checkInt(Loc, Val, 16, OriginalType);
	write16(Loc, Val);
	break;
	case R_PPC64_ADDR16_DS:
	case R_PPC64_TPREL16_DS: {
	checkInt(Loc, Val, 16, OriginalType);
	// DQ-form instructions use bits 28-31 as part of the instruction encoding
	// DS-form instructions only use bits 30-31.
	uint16_t Mask = isDQFormInstruction(readInstrFromHalf16(Loc)) ? 0xF : 0x3;
	checkAlignment(Loc, lo(Val), Mask + 1, OriginalType);
	write16(Loc, (read16(Loc) & Mask) \| lo(Val));
	} break;
	case R_PPC64_ADDR16_HA:
	case R_PPC64_REL16_HA:
	case R_PPC64_TPREL16_HA:
	if (Config->TocOptimize && ShouldTocOptimize && ha(Val) == 0)
	writeInstrFromHalf16(Loc, 0x60000000);
	else
	write16(Loc, ha(Val));
	break;
	case R_PPC64_ADDR16_HI:
	case R_PPC64_REL16_HI:
	case R_PPC64_TPREL16_HI:
	write16(Loc, hi(Val));
	break;
	case R_PPC64_ADDR16_HIGHER:
	case R_PPC64_TPREL16_HIGHER:
	write16(Loc, higher(Val));
	break;
	case R_PPC64_ADDR16_HIGHERA:
	case R_PPC64_TPREL16_HIGHERA:
	write16(Loc, highera(Val));
	break;
	case R_PPC64_ADDR16_HIGHEST:
	case R_PPC64_TPREL16_HIGHEST:
	write16(Loc, highest(Val));
	break;
	case R_PPC64_ADDR16_HIGHESTA:
	case R_PPC64_TPREL16_HIGHESTA:
	write16(Loc, highesta(Val));
	break;
	case R_PPC64_ADDR16_LO:
	case R_PPC64_REL16_LO:
	case R_PPC64_TPREL16_LO:
	// When the high-adjusted part of a toc relocation evalutes to 0, it is
	// changed into a nop. The lo part then needs to be updated to use the
	// toc-pointer register r2, as the base register.
	if (Config->TocOptimize && ShouldTocOptimize && ha(Val) == 0) {
	uint32_t Instr = readInstrFromHalf16(Loc);
	if (isInstructionUpdateForm(Instr))
	error(getErrorLocation(Loc) +
	"can't toc-optimize an update instruction: 0x" +
	utohexstr(Instr));
	Instr = (Instr & 0xFFE00000) \| 0x00020000;
	writeInstrFromHalf16(Loc, Instr);
	}
	write16(Loc, lo(Val));
	break;
	case R_PPC64_ADDR16_LO_DS:
	case R_PPC64_TPREL16_LO_DS: {
	// DQ-form instructions use bits 28-31 as part of the instruction encoding
	// DS-form instructions only use bits 30-31.
	uint32_t Inst = readInstrFromHalf16(Loc);
	uint16_t Mask = isDQFormInstruction(Inst) ? 0xF : 0x3;
	checkAlignment(Loc, lo(Val), Mask + 1, OriginalType);
	if (Config->TocOptimize && ShouldTocOptimize && ha(Val) == 0) {
	// When the high-adjusted part of a toc relocation evalutes to 0, it is
	// changed into a nop. The lo part then needs to be updated to use the toc
	// pointer register r2, as the base register.
	if (isInstructionUpdateForm(Inst))
	error(getErrorLocation(Loc) +
	"Can't toc-optimize an update instruction: 0x" +
	Twine::utohexstr(Inst));
	Inst = (Inst & 0xFFE0000F) \| 0x00020000;
	writeInstrFromHalf16(Loc, Inst);
	}
	write16(Loc, (read16(Loc) & Mask) \| lo(Val));
	} break;
	case R_PPC64_ADDR32:
	case R_PPC64_REL32:
	checkInt(Loc, Val, 32, Type);
	write32(Loc, Val);
	break;
	case R_PPC64_ADDR64:
	case R_PPC64_REL64:
	case R_PPC64_TOC:
	write64(Loc, Val);
	break;
	case R_PPC64_REL14: {
	uint32_t Mask = 0x0000FFFC;
	checkInt(Loc, Val, 16, Type);
	checkAlignment(Loc, Val, 4, Type);
	write32(Loc, (read32(Loc) & ~Mask) \| (Val & Mask));
	break;
	}
	case R_PPC64_REL24: {
	uint32_t Mask = 0x03FFFFFC;
	checkInt(Loc, Val, 26, Type);
	checkAlignment(Loc, Val, 4, Type);
	write32(Loc, (read32(Loc) & ~Mask) \| (Val & Mask));
	break;
	}
	case R_PPC64_DTPREL64:
	write64(Loc, Val - DynamicThreadPointerOffset);
	break;
	default:
	error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type));
	}
	}

	bool PPC64::needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
	uint64_t BranchAddr, const Symbol &S) const {
	if (Type != R_PPC64_REL14 && Type != R_PPC64_REL24)
	return false;

	// If a function is in the Plt it needs to be called with a call-stub.
	if (S.isInPlt())
	return true;

	// If a symbol is a weak undefined and we are compiling an executable
	// it doesn't need a range-extending thunk since it can't be called.
	if (S.isUndefWeak() && !Config->Shared)
	return false;

	// If the offset exceeds the range of the branch type then it will need
	// a range-extending thunk.
	- return !inBranchRange(Type, BranchAddr, S.getVA());
	+ // See the comment in getRelocTargetVA() about R_PPC64_CALL.
	+ return !inBranchRange(Type, BranchAddr,
	+ S.getVA() +
	+ getPPC64GlobalEntryToLocalEntryOffset(S.StOther));
	}

	uint32_t PPC64::getThunkSectionSpacing() const {
	// See comment in Arch/ARM.cpp for a more detailed explanation of
	// getThunkSectionSpacing(). For PPC64 we pick the constant here based on
	// R_PPC64_REL24, which is used by unconditional branch instructions.
	// 0x2000000 = (1 << 24-1) * 4
	return 0x2000000;
	}

	bool PPC64::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const {
	int64_t Offset = Dst - Src;
	if (Type == R_PPC64_REL14)
	return isInt<16>(Offset);
	if (Type == R_PPC64_REL24)
	return isInt<26>(Offset);
	llvm_unreachable("unsupported relocation type used in branch");
	}

	RelExpr PPC64::adjustRelaxExpr(RelType Type, const uint8_t *Data,
	RelExpr Expr) const {
	if (Expr == R_RELAX_TLS_GD_TO_IE)
	return R_RELAX_TLS_GD_TO_IE_GOT_OFF;
	if (Expr == R_RELAX_TLS_LD_TO_LE)
	return R_RELAX_TLS_LD_TO_LE_ABS;
	return Expr;
	}

	// Reference: 3.7.4.1 of the 64-bit ELF V2 abi supplement.
	// The general dynamic code sequence for a global `x` uses 4 instructions.
	// Instruction Relocation Symbol
	// addis r3, r2, x@got@tlsgd@ha R_PPC64_GOT_TLSGD16_HA x
	// addi r3, r3, x@got@tlsgd@l R_PPC64_GOT_TLSGD16_LO x
	// bl __tls_get_addr(x@tlsgd) R_PPC64_TLSGD x
	// R_PPC64_REL24 __tls_get_addr
	// nop None None
	//
	// Relaxing to initial-exec entails:
	// 1) Convert the addis/addi pair that builds the address of the tls_index
	// struct for 'x' to an addis/ld pair that loads an offset from a got-entry.
	// 2) Convert the call to __tls_get_addr to a nop.
	// 3) Convert the nop following the call to an add of the loaded offset to the
	// thread pointer.
	// Since the nop must directly follow the call, the R_PPC64_TLSGD relocation is
	// used as the relaxation hint for both steps 2 and 3.
	void PPC64::relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const {
	switch (Type) {
	case R_PPC64_GOT_TLSGD16_HA:
	// This is relaxed from addis rT, r2, sym@got@tlsgd@ha to
	// addis rT, r2, sym@got@tprel@ha.
	relocateOne(Loc, R_PPC64_GOT_TPREL16_HA, Val);
	return;
	case R_PPC64_GOT_TLSGD16_LO: {
	// Relax from addi r3, rA, sym@got@tlsgd@l to
	// ld r3, sym@got@tprel@l(rA)
	uint32_t InputRegister = (readInstrFromHalf16(Loc) & (0x1f << 16));
	writeInstrFromHalf16(Loc, 0xE8600000 \| InputRegister);
	relocateOne(Loc, R_PPC64_GOT_TPREL16_LO_DS, Val);
	return;
	}
	case R_PPC64_TLSGD:
	write32(Loc, 0x60000000); // bl __tls_get_addr(sym@tlsgd) --> nop
	write32(Loc + 4, 0x7c636A14); // nop --> add r3, r3, r13
	return;
	default:
	llvm_unreachable("unsupported relocation for TLS GD to IE relaxation");
	}
	}

	// The prologue for a split-stack function is expected to look roughly
	// like this:
	// .Lglobal_entry_point:
	// # TOC pointer initalization.
	// ...
	// .Llocal_entry_point:
	// # load the __private_ss member of the threads tcbhead.
	// ld r0,-0x7000-64(r13)
	// # subtract the functions stack size from the stack pointer.
	// addis r12, r1, ha(-stack-frame size)
	// addi r12, r12, l(-stack-frame size)
	// # compare needed to actual and branch to allocate_more_stack if more
	// # space is needed, otherwise fallthrough to 'normal' function body.
	// cmpld cr7,r12,r0
	// blt- cr7, .Lallocate_more_stack
	//
	// -) The allocate_more_stack block might be placed after the split-stack
	// prologue and the `blt-` replaced with a `bge+ .Lnormal_func_body`
	// instead.
	// -) If either the addis or addi is not needed due to the stack size being
	// smaller then 32K or a multiple of 64K they will be replaced with a nop,
	// but there will always be 2 instructions the linker can overwrite for the
	// adjusted stack size.
	//
	// The linkers job here is to increase the stack size used in the addis/addi
	// pair by split-stack-size-adjust.
	// addis r12, r1, ha(-stack-frame size - split-stack-adjust-size)
	// addi r12, r12, l(-stack-frame size - split-stack-adjust-size)
	bool PPC64::adjustPrologueForCrossSplitStack(uint8_t Loc, uint8_t End,
	uint8_t StOther) const {
	// If the caller has a global entry point adjust the buffer past it. The start
	// of the split-stack prologue will be at the local entry point.
	Loc += getPPC64GlobalEntryToLocalEntryOffset(StOther);

	// At the very least we expect to see a load of some split-stack data from the
	// tcb, and 2 instructions that calculate the ending stack address this
	// function will require. If there is not enough room for at least 3
	// instructions it can't be a split-stack prologue.
	if (Loc + 12 >= End)
	return false;

	// First instruction must be `ld r0, -0x7000-64(r13)`
	if (read32(Loc) != 0xe80d8fc0)
	return false;

	int16_t HiImm = 0;
	int16_t LoImm = 0;
	// First instruction can be either an addis if the frame size is larger then
	// 32K, or an addi if the size is less then 32K.
	int32_t FirstInstr = read32(Loc + 4);
	if (getPrimaryOpCode(FirstInstr) == 15) {
	HiImm = FirstInstr & 0xFFFF;
	} else if (getPrimaryOpCode(FirstInstr) == 14) {
	LoImm = FirstInstr & 0xFFFF;
	} else {
	return false;
	}

	// Second instruction is either an addi or a nop. If the first instruction was
	// an addi then LoImm is set and the second instruction must be a nop.
	uint32_t SecondInstr = read32(Loc + 8);
	if (!LoImm && getPrimaryOpCode(SecondInstr) == 14) {
	LoImm = SecondInstr & 0xFFFF;
	} else if (SecondInstr != 0x60000000) {
	return false;
	}

	// The register operands of the first instruction should be the stack-pointer
	// (r1) as the input (RA) and r12 as the output (RT). If the second
	// instruction is not a nop, then it should use r12 as both input and output.
	auto CheckRegOperands = [](uint32_t Instr, uint8_t ExpectedRT,
	uint8_t ExpectedRA) {
	return ((Instr & 0x3E00000) >> 21 == ExpectedRT) &&
	((Instr & 0x1F0000) >> 16 == ExpectedRA);
	};
	if (!CheckRegOperands(FirstInstr, 12, 1))
	return false;
	if (SecondInstr != 0x60000000 && !CheckRegOperands(SecondInstr, 12, 12))
	return false;

	int32_t StackFrameSize = (HiImm * 65536) + LoImm;
	// Check that the adjusted size doesn't overflow what we can represent with 2
	// instructions.
	if (StackFrameSize < Config->SplitStackAdjustSize + INT32_MIN) {
	error(getErrorLocation(Loc) + "split-stack prologue adjustment overflows");
	return false;
	}

	int32_t AdjustedStackFrameSize =
	StackFrameSize - Config->SplitStackAdjustSize;

	LoImm = AdjustedStackFrameSize & 0xFFFF;
	HiImm = (AdjustedStackFrameSize + 0x8000) >> 16;
	if (HiImm) {
	write32(Loc + 4, 0x3D810000 \| (uint16_t)HiImm);
	// If the low immediate is zero the second instruction will be a nop.
	SecondInstr = LoImm ? 0x398C0000 \| (uint16_t)LoImm : 0x60000000;
	write32(Loc + 8, SecondInstr);
	} else {
	// addi r12, r1, imm
	write32(Loc + 4, (0x39810000) \| (uint16_t)LoImm);
	write32(Loc + 8, 0x60000000);
	}

	return true;
	}

	TargetInfo *elf::getPPC64TargetInfo() {
	static PPC64 Target;
	return &Target;
	}
	Index: head/contrib/llvm/tools/lld
	===================================================================
	--- head/contrib/llvm/tools/lld (revision 349792)
	+++ head/contrib/llvm/tools/lld (revision 349793)

	Property changes on: head/contrib/llvm/tools/lld
	___________________________________________________________________
	Modified: svn:mergeinfo
	## -0,0 +0,1 ##
	Merged /vendor/lld/dist-release_80:r348963-349790
	Index: head/contrib/llvm/tools/lldb
	===================================================================
	--- head/contrib/llvm/tools/lldb (revision 349792)
	+++ head/contrib/llvm/tools/lldb (revision 349793)

	Property changes on: head/contrib/llvm/tools/lldb
	___________________________________________________________________
	Modified: svn:mergeinfo
	## -0,0 +0,1 ##
	Merged /vendor/lldb/dist-release_80:r348963-349790
	Index: head/contrib/llvm
	===================================================================
	--- head/contrib/llvm (revision 349792)
	+++ head/contrib/llvm (revision 349793)

	Property changes on: head/contrib/llvm
	___________________________________________________________________
	Modified: svn:mergeinfo
	## -0,0 +0,1 ##
	Merged /vendor/llvm/dist-release_80:r348963-349790
	Index: head/contrib/openmp/runtime/src/kmp_atomic.h
	===================================================================
	--- head/contrib/openmp/runtime/src/kmp_atomic.h (revision 349792)
	+++ head/contrib/openmp/runtime/src/kmp_atomic.h (revision 349793)
	@@ -1,1776 +1,1776 @@
	/*
	* kmp_atomic.h - ATOMIC header file
	*/

	//===----------------------------------------------------------------------===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is dual licensed under the MIT and the University of Illinois Open
	// Source Licenses. See LICENSE.txt for details.
	//
	//===----------------------------------------------------------------------===//

	#ifndef KMP_ATOMIC_H
	#define KMP_ATOMIC_H

	#include "kmp_lock.h"
	#include "kmp_os.h"

	#if OMPT_SUPPORT
	#include "ompt-specific.h"
	#endif

	// C++ build port.
	// Intel compiler does not support _Complex datatype on win.
	// Intel compiler supports _Complex datatype on lin and mac.
	// On the other side, there is a problem of stack alignment on lin_32 and mac_32
	// if the rhs is cmplx80 or cmplx128 typedef'ed datatype.
	// The decision is: to use compiler supported _Complex type on lin and mac,
	// to use typedef'ed types on win.
	// Condition for WIN64 was modified in anticipation of 10.1 build compiler.

	#if defined(__cplusplus) && (KMP_OS_WINDOWS)
	// create shortcuts for c99 complex types

	// Visual Studio cannot have function parameters that have the
	// align __declspec attribute, so we must remove it. (Compiler Error C2719)
	#if KMP_COMPILER_MSVC
	#undef KMP_DO_ALIGN
	#define KMP_DO_ALIGN(alignment) /* Nothing */
	#endif

	#if (_MSC_VER < 1600) && defined(_DEBUG)
	// Workaround for the problem of _DebugHeapTag unresolved external.
	// This problem prevented to use our static debug library for C tests
	// compiled with /MDd option (the library itself built with /MTd),
	#undef _DEBUG
	#define _DEBUG_TEMPORARILY_UNSET_
	#endif

	#include <complex>

	template <typename type_lhs, typename type_rhs>
	std::complex<type_lhs> __kmp_lhs_div_rhs(const std::complex<type_lhs> &lhs,
	const std::complex<type_rhs> &rhs) {
	type_lhs a = lhs.real();
	type_lhs b = lhs.imag();
	type_rhs c = rhs.real();
	type_rhs d = rhs.imag();
	type_rhs den = c * c + d * d;
	type_rhs r = (a * c + b * d);
	type_rhs i = (b * c - a * d);
	std::complex<type_lhs> ret(r / den, i / den);
	return ret;
	}

	// complex8
	struct __kmp_cmplx64_t : std::complex<double> {

	__kmp_cmplx64_t() : std::complex<double>() {}

	__kmp_cmplx64_t(const std::complex<double> &cd) : std::complex<double>(cd) {}

	void operator/=(const __kmp_cmplx64_t &rhs) {
	std::complex<double> lhs = *this;
	*this = __kmp_lhs_div_rhs(lhs, rhs);
	}

	__kmp_cmplx64_t operator/(const __kmp_cmplx64_t &rhs) {
	std::complex<double> lhs = *this;
	return __kmp_lhs_div_rhs(lhs, rhs);
	}
	};
	typedef struct __kmp_cmplx64_t kmp_cmplx64;

	// complex4
	struct __kmp_cmplx32_t : std::complex<float> {

	__kmp_cmplx32_t() : std::complex<float>() {}

	__kmp_cmplx32_t(const std::complex<float> &cf) : std::complex<float>(cf) {}

	__kmp_cmplx32_t operator+(const __kmp_cmplx32_t &b) {
	std::complex<float> lhs = *this;
	std::complex<float> rhs = b;
	return (lhs + rhs);
	}
	__kmp_cmplx32_t operator-(const __kmp_cmplx32_t &b) {
	std::complex<float> lhs = *this;
	std::complex<float> rhs = b;
	return (lhs - rhs);
	}
	__kmp_cmplx32_t operator*(const __kmp_cmplx32_t &b) {
	std::complex<float> lhs = *this;
	std::complex<float> rhs = b;
	return (lhs * rhs);
	}

	__kmp_cmplx32_t operator+(const kmp_cmplx64 &b) {
	kmp_cmplx64 t = kmp_cmplx64(*this) + b;
	std::complex<double> d(t);
	std::complex<float> f(d);
	__kmp_cmplx32_t r(f);
	return r;
	}
	__kmp_cmplx32_t operator-(const kmp_cmplx64 &b) {
	kmp_cmplx64 t = kmp_cmplx64(*this) - b;
	std::complex<double> d(t);
	std::complex<float> f(d);
	__kmp_cmplx32_t r(f);
	return r;
	}
	__kmp_cmplx32_t operator*(const kmp_cmplx64 &b) {
	kmp_cmplx64 t = kmp_cmplx64(this) b;
	std::complex<double> d(t);
	std::complex<float> f(d);
	__kmp_cmplx32_t r(f);
	return r;
	}

	void operator/=(const __kmp_cmplx32_t &rhs) {
	std::complex<float> lhs = *this;
	*this = __kmp_lhs_div_rhs(lhs, rhs);
	}

	__kmp_cmplx32_t operator/(const __kmp_cmplx32_t &rhs) {
	std::complex<float> lhs = *this;
	return __kmp_lhs_div_rhs(lhs, rhs);
	}

	void operator/=(const kmp_cmplx64 &rhs) {
	std::complex<float> lhs = *this;
	*this = __kmp_lhs_div_rhs(lhs, rhs);
	}

	__kmp_cmplx32_t operator/(const kmp_cmplx64 &rhs) {
	std::complex<float> lhs = *this;
	return __kmp_lhs_div_rhs(lhs, rhs);
	}
	};
	typedef struct __kmp_cmplx32_t kmp_cmplx32;

	// complex10
	struct KMP_DO_ALIGN(16) __kmp_cmplx80_t : std::complex<long double> {

	__kmp_cmplx80_t() : std::complex<long double>() {}

	__kmp_cmplx80_t(const std::complex<long double> &cld)
	: std::complex<long double>(cld) {}

	void operator/=(const __kmp_cmplx80_t &rhs) {
	std::complex<long double> lhs = *this;
	*this = __kmp_lhs_div_rhs(lhs, rhs);
	}

	__kmp_cmplx80_t operator/(const __kmp_cmplx80_t &rhs) {
	std::complex<long double> lhs = *this;
	return __kmp_lhs_div_rhs(lhs, rhs);
	}
	};
	typedef KMP_DO_ALIGN(16) struct __kmp_cmplx80_t kmp_cmplx80;

	// complex16
	#if KMP_HAVE_QUAD
	struct __kmp_cmplx128_t : std::complex<_Quad> {

	__kmp_cmplx128_t() : std::complex<_Quad>() {}

	__kmp_cmplx128_t(const std::complex<_Quad> &cq) : std::complex<_Quad>(cq) {}

	void operator/=(const __kmp_cmplx128_t &rhs) {
	std::complex<_Quad> lhs = *this;
	*this = __kmp_lhs_div_rhs(lhs, rhs);
	}

	__kmp_cmplx128_t operator/(const __kmp_cmplx128_t &rhs) {
	std::complex<_Quad> lhs = *this;
	return __kmp_lhs_div_rhs(lhs, rhs);
	}
	};
	typedef struct __kmp_cmplx128_t kmp_cmplx128;
	#endif /* KMP_HAVE_QUAD */

	#ifdef _DEBUG_TEMPORARILY_UNSET_
	#undef _DEBUG_TEMPORARILY_UNSET_
	// Set it back now
	#define _DEBUG 1
	#endif

	#else
	// create shortcuts for c99 complex types
	typedef float _Complex kmp_cmplx32;
	typedef double _Complex kmp_cmplx64;
	typedef long double _Complex kmp_cmplx80;
	#if KMP_HAVE_QUAD
	typedef _Quad _Complex kmp_cmplx128;
	#endif
	#endif

	// Compiler 12.0 changed alignment of 16 and 32-byte arguments (like _Quad
	// and kmp_cmplx128) on IA-32 architecture. The following aligned structures
	// are implemented to support the old alignment in 10.1, 11.0, 11.1 and
	// introduce the new alignment in 12.0. See CQ88405.
	#if KMP_ARCH_X86 && KMP_HAVE_QUAD

	// 4-byte aligned structures for backward compatibility.

	#pragma pack(push, 4)

	struct KMP_DO_ALIGN(4) Quad_a4_t {
	_Quad q;

	Quad_a4_t() : q() {}
	Quad_a4_t(const _Quad &cq) : q(cq) {}

	Quad_a4_t operator+(const Quad_a4_t &b) {
	_Quad lhs = (*this).q;
	_Quad rhs = b.q;
	return (Quad_a4_t)(lhs + rhs);
	}

	Quad_a4_t operator-(const Quad_a4_t &b) {
	_Quad lhs = (*this).q;
	_Quad rhs = b.q;
	return (Quad_a4_t)(lhs - rhs);
	}
	Quad_a4_t operator*(const Quad_a4_t &b) {
	_Quad lhs = (*this).q;
	_Quad rhs = b.q;
	return (Quad_a4_t)(lhs * rhs);
	}

	Quad_a4_t operator/(const Quad_a4_t &b) {
	_Quad lhs = (*this).q;
	_Quad rhs = b.q;
	return (Quad_a4_t)(lhs / rhs);
	}
	};

	struct KMP_DO_ALIGN(4) kmp_cmplx128_a4_t {
	kmp_cmplx128 q;

	kmp_cmplx128_a4_t() : q() {}

	kmp_cmplx128_a4_t(const kmp_cmplx128 &c128) : q(c128) {}

	kmp_cmplx128_a4_t operator+(const kmp_cmplx128_a4_t &b) {
	kmp_cmplx128 lhs = (*this).q;
	kmp_cmplx128 rhs = b.q;
	return (kmp_cmplx128_a4_t)(lhs + rhs);
	}
	kmp_cmplx128_a4_t operator-(const kmp_cmplx128_a4_t &b) {
	kmp_cmplx128 lhs = (*this).q;
	kmp_cmplx128 rhs = b.q;
	return (kmp_cmplx128_a4_t)(lhs - rhs);
	}
	kmp_cmplx128_a4_t operator*(const kmp_cmplx128_a4_t &b) {
	kmp_cmplx128 lhs = (*this).q;
	kmp_cmplx128 rhs = b.q;
	return (kmp_cmplx128_a4_t)(lhs * rhs);
	}

	kmp_cmplx128_a4_t operator/(const kmp_cmplx128_a4_t &b) {
	kmp_cmplx128 lhs = (*this).q;
	kmp_cmplx128 rhs = b.q;
	return (kmp_cmplx128_a4_t)(lhs / rhs);
	}
	};

	#pragma pack(pop)

	// New 16-byte aligned structures for 12.0 compiler.
	struct KMP_DO_ALIGN(16) Quad_a16_t {
	_Quad q;

	Quad_a16_t() : q() {}
	Quad_a16_t(const _Quad &cq) : q(cq) {}

	Quad_a16_t operator+(const Quad_a16_t &b) {
	_Quad lhs = (*this).q;
	_Quad rhs = b.q;
	return (Quad_a16_t)(lhs + rhs);
	}

	Quad_a16_t operator-(const Quad_a16_t &b) {
	_Quad lhs = (*this).q;
	_Quad rhs = b.q;
	return (Quad_a16_t)(lhs - rhs);
	}
	Quad_a16_t operator*(const Quad_a16_t &b) {
	_Quad lhs = (*this).q;
	_Quad rhs = b.q;
	return (Quad_a16_t)(lhs * rhs);
	}

	Quad_a16_t operator/(const Quad_a16_t &b) {
	_Quad lhs = (*this).q;
	_Quad rhs = b.q;
	return (Quad_a16_t)(lhs / rhs);
	}
	};

	struct KMP_DO_ALIGN(16) kmp_cmplx128_a16_t {
	kmp_cmplx128 q;

	kmp_cmplx128_a16_t() : q() {}

	kmp_cmplx128_a16_t(const kmp_cmplx128 &c128) : q(c128) {}

	kmp_cmplx128_a16_t operator+(const kmp_cmplx128_a16_t &b) {
	kmp_cmplx128 lhs = (*this).q;
	kmp_cmplx128 rhs = b.q;
	return (kmp_cmplx128_a16_t)(lhs + rhs);
	}
	kmp_cmplx128_a16_t operator-(const kmp_cmplx128_a16_t &b) {
	kmp_cmplx128 lhs = (*this).q;
	kmp_cmplx128 rhs = b.q;
	return (kmp_cmplx128_a16_t)(lhs - rhs);
	}
	kmp_cmplx128_a16_t operator*(const kmp_cmplx128_a16_t &b) {
	kmp_cmplx128 lhs = (*this).q;
	kmp_cmplx128 rhs = b.q;
	return (kmp_cmplx128_a16_t)(lhs * rhs);
	}

	kmp_cmplx128_a16_t operator/(const kmp_cmplx128_a16_t &b) {
	kmp_cmplx128 lhs = (*this).q;
	kmp_cmplx128 rhs = b.q;
	return (kmp_cmplx128_a16_t)(lhs / rhs);
	}
	};

	#endif

	#if (KMP_ARCH_X86)
	#define QUAD_LEGACY Quad_a4_t
	#define CPLX128_LEG kmp_cmplx128_a4_t
	#else
	#define QUAD_LEGACY _Quad
	#define CPLX128_LEG kmp_cmplx128
	#endif

	#ifdef __cplusplus
	extern "C" {
	#endif

	extern int __kmp_atomic_mode;

	// Atomic locks can easily become contended, so we use queuing locks for them.
	typedef kmp_queuing_lock_t kmp_atomic_lock_t;

	static inline void __kmp_acquire_atomic_lock(kmp_atomic_lock_t *lck,
	kmp_int32 gtid) {
	#if OMPT_SUPPORT && OMPT_OPTIONAL
	if (ompt_enabled.ompt_callback_mutex_acquire) {
	ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
	- ompt_mutex_atomic, 0, kmp_mutex_impl_queuing, (ompt_wait_id_t)lck,
	+ ompt_mutex_atomic, 0, kmp_mutex_impl_queuing, (ompt_wait_id_t)(uintptr_t)lck,
	OMPT_GET_RETURN_ADDRESS(0));
	}
	#endif

	__kmp_acquire_queuing_lock(lck, gtid);

	#if OMPT_SUPPORT && OMPT_OPTIONAL
	if (ompt_enabled.ompt_callback_mutex_acquired) {
	ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
	- ompt_mutex_atomic, (ompt_wait_id_t)lck, OMPT_GET_RETURN_ADDRESS(0));
	+ ompt_mutex_atomic, (ompt_wait_id_t)(uintptr_t)lck, OMPT_GET_RETURN_ADDRESS(0));
	}
	#endif
	}

	static inline int __kmp_test_atomic_lock(kmp_atomic_lock_t *lck,
	kmp_int32 gtid) {
	return __kmp_test_queuing_lock(lck, gtid);
	}

	static inline void __kmp_release_atomic_lock(kmp_atomic_lock_t *lck,
	kmp_int32 gtid) {
	__kmp_release_queuing_lock(lck, gtid);
	#if OMPT_SUPPORT && OMPT_OPTIONAL
	if (ompt_enabled.ompt_callback_mutex_released) {
	ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
	- ompt_mutex_atomic, (ompt_wait_id_t)lck, OMPT_GET_RETURN_ADDRESS(0));
	+ ompt_mutex_atomic, (ompt_wait_id_t)(uintptr_t)lck, OMPT_GET_RETURN_ADDRESS(0));
	}
	#endif
	}

	static inline void __kmp_init_atomic_lock(kmp_atomic_lock_t *lck) {
	__kmp_init_queuing_lock(lck);
	}

	static inline void __kmp_destroy_atomic_lock(kmp_atomic_lock_t *lck) {
	__kmp_destroy_queuing_lock(lck);
	}

	// Global Locks
	extern kmp_atomic_lock_t __kmp_atomic_lock; /* Control access to all user coded
	atomics in Gnu compat mode */
	extern kmp_atomic_lock_t __kmp_atomic_lock_1i; /* Control access to all user
	coded atomics for 1-byte fixed
	data types */
	extern kmp_atomic_lock_t __kmp_atomic_lock_2i; /* Control access to all user
	coded atomics for 2-byte fixed
	data types */
	extern kmp_atomic_lock_t __kmp_atomic_lock_4i; /* Control access to all user
	coded atomics for 4-byte fixed
	data types */
	extern kmp_atomic_lock_t __kmp_atomic_lock_4r; /* Control access to all user
	coded atomics for kmp_real32
	data type */
	extern kmp_atomic_lock_t __kmp_atomic_lock_8i; /* Control access to all user
	coded atomics for 8-byte fixed
	data types */
	extern kmp_atomic_lock_t __kmp_atomic_lock_8r; /* Control access to all user
	coded atomics for kmp_real64
	data type */
	extern kmp_atomic_lock_t
	__kmp_atomic_lock_8c; /* Control access to all user coded atomics for
	complex byte data type */
	extern kmp_atomic_lock_t
	__kmp_atomic_lock_10r; /* Control access to all user coded atomics for long
	double data type */
	extern kmp_atomic_lock_t __kmp_atomic_lock_16r; /* Control access to all user
	coded atomics for _Quad data
	type */
	extern kmp_atomic_lock_t __kmp_atomic_lock_16c; /* Control access to all user
	coded atomics for double
	complex data type*/
	extern kmp_atomic_lock_t
	__kmp_atomic_lock_20c; /* Control access to all user coded atomics for long
	double complex type*/
	extern kmp_atomic_lock_t __kmp_atomic_lock_32c; /* Control access to all user
	coded atomics for _Quad
	complex data type */

	// Below routines for atomic UPDATE are listed

	// 1-byte
	void __kmpc_atomic_fixed1_add(ident_t id_ref, int gtid, char lhs, char rhs);
	void __kmpc_atomic_fixed1_andb(ident_t id_ref, int gtid, char lhs, char rhs);
	void __kmpc_atomic_fixed1_div(ident_t id_ref, int gtid, char lhs, char rhs);
	void __kmpc_atomic_fixed1u_div(ident_t id_ref, int gtid, unsigned char lhs,
	unsigned char rhs);
	void __kmpc_atomic_fixed1_mul(ident_t id_ref, int gtid, char lhs, char rhs);
	void __kmpc_atomic_fixed1_orb(ident_t id_ref, int gtid, char lhs, char rhs);
	void __kmpc_atomic_fixed1_shl(ident_t id_ref, int gtid, char lhs, char rhs);
	void __kmpc_atomic_fixed1_shr(ident_t id_ref, int gtid, char lhs, char rhs);
	void __kmpc_atomic_fixed1u_shr(ident_t id_ref, int gtid, unsigned char lhs,
	unsigned char rhs);
	void __kmpc_atomic_fixed1_sub(ident_t id_ref, int gtid, char lhs, char rhs);
	void __kmpc_atomic_fixed1_xor(ident_t id_ref, int gtid, char lhs, char rhs);
	// 2-byte
	void __kmpc_atomic_fixed2_add(ident_t id_ref, int gtid, short lhs, short rhs);
	void __kmpc_atomic_fixed2_andb(ident_t id_ref, int gtid, short lhs,
	short rhs);
	void __kmpc_atomic_fixed2_div(ident_t id_ref, int gtid, short lhs, short rhs);
	void __kmpc_atomic_fixed2u_div(ident_t id_ref, int gtid, unsigned short lhs,
	unsigned short rhs);
	void __kmpc_atomic_fixed2_mul(ident_t id_ref, int gtid, short lhs, short rhs);
	void __kmpc_atomic_fixed2_orb(ident_t id_ref, int gtid, short lhs, short rhs);
	void __kmpc_atomic_fixed2_shl(ident_t id_ref, int gtid, short lhs, short rhs);
	void __kmpc_atomic_fixed2_shr(ident_t id_ref, int gtid, short lhs, short rhs);
	void __kmpc_atomic_fixed2u_shr(ident_t id_ref, int gtid, unsigned short lhs,
	unsigned short rhs);
	void __kmpc_atomic_fixed2_sub(ident_t id_ref, int gtid, short lhs, short rhs);
	void __kmpc_atomic_fixed2_xor(ident_t id_ref, int gtid, short lhs, short rhs);
	// 4-byte add / sub fixed
	void __kmpc_atomic_fixed4_add(ident_t id_ref, int gtid, kmp_int32 lhs,
	kmp_int32 rhs);
	void __kmpc_atomic_fixed4_sub(ident_t id_ref, int gtid, kmp_int32 lhs,
	kmp_int32 rhs);
	// 4-byte add / sub float
	void __kmpc_atomic_float4_add(ident_t id_ref, int gtid, kmp_real32 lhs,
	kmp_real32 rhs);
	void __kmpc_atomic_float4_sub(ident_t id_ref, int gtid, kmp_real32 lhs,
	kmp_real32 rhs);
	// 8-byte add / sub fixed
	void __kmpc_atomic_fixed8_add(ident_t id_ref, int gtid, kmp_int64 lhs,
	kmp_int64 rhs);
	void __kmpc_atomic_fixed8_sub(ident_t id_ref, int gtid, kmp_int64 lhs,
	kmp_int64 rhs);
	// 8-byte add / sub float
	void __kmpc_atomic_float8_add(ident_t id_ref, int gtid, kmp_real64 lhs,
	kmp_real64 rhs);
	void __kmpc_atomic_float8_sub(ident_t id_ref, int gtid, kmp_real64 lhs,
	kmp_real64 rhs);
	// 4-byte fixed
	void __kmpc_atomic_fixed4_andb(ident_t id_ref, int gtid, kmp_int32 lhs,
	kmp_int32 rhs);
	void __kmpc_atomic_fixed4_div(ident_t id_ref, int gtid, kmp_int32 lhs,
	kmp_int32 rhs);
	void __kmpc_atomic_fixed4u_div(ident_t id_ref, int gtid, kmp_uint32 lhs,
	kmp_uint32 rhs);
	void __kmpc_atomic_fixed4_mul(ident_t id_ref, int gtid, kmp_int32 lhs,
	kmp_int32 rhs);
	void __kmpc_atomic_fixed4_orb(ident_t id_ref, int gtid, kmp_int32 lhs,
	kmp_int32 rhs);
	void __kmpc_atomic_fixed4_shl(ident_t id_ref, int gtid, kmp_int32 lhs,
	kmp_int32 rhs);
	void __kmpc_atomic_fixed4_shr(ident_t id_ref, int gtid, kmp_int32 lhs,
	kmp_int32 rhs);
	void __kmpc_atomic_fixed4u_shr(ident_t id_ref, int gtid, kmp_uint32 lhs,
	kmp_uint32 rhs);
	void __kmpc_atomic_fixed4_xor(ident_t id_ref, int gtid, kmp_int32 lhs,
	kmp_int32 rhs);
	// 8-byte fixed
	void __kmpc_atomic_fixed8_andb(ident_t id_ref, int gtid, kmp_int64 lhs,
	kmp_int64 rhs);
	void __kmpc_atomic_fixed8_div(ident_t id_ref, int gtid, kmp_int64 lhs,
	kmp_int64 rhs);
	void __kmpc_atomic_fixed8u_div(ident_t id_ref, int gtid, kmp_uint64 lhs,
	kmp_uint64 rhs);
	void __kmpc_atomic_fixed8_mul(ident_t id_ref, int gtid, kmp_int64 lhs,
	kmp_int64 rhs);
	void __kmpc_atomic_fixed8_orb(ident_t id_ref, int gtid, kmp_int64 lhs,
	kmp_int64 rhs);
	void __kmpc_atomic_fixed8_shl(ident_t id_ref, int gtid, kmp_int64 lhs,
	kmp_int64 rhs);
	void __kmpc_atomic_fixed8_shr(ident_t id_ref, int gtid, kmp_int64 lhs,
	kmp_int64 rhs);
	void __kmpc_atomic_fixed8u_shr(ident_t id_ref, int gtid, kmp_uint64 lhs,
	kmp_uint64 rhs);
	void __kmpc_atomic_fixed8_xor(ident_t id_ref, int gtid, kmp_int64 lhs,
	kmp_int64 rhs);
	// 4-byte float
	void __kmpc_atomic_float4_div(ident_t id_ref, int gtid, kmp_real32 lhs,
	kmp_real32 rhs);
	void __kmpc_atomic_float4_mul(ident_t id_ref, int gtid, kmp_real32 lhs,
	kmp_real32 rhs);
	// 8-byte float
	void __kmpc_atomic_float8_div(ident_t id_ref, int gtid, kmp_real64 lhs,
	kmp_real64 rhs);
	void __kmpc_atomic_float8_mul(ident_t id_ref, int gtid, kmp_real64 lhs,
	kmp_real64 rhs);
	// 1-, 2-, 4-, 8-byte logical (&&, \|\|)
	void __kmpc_atomic_fixed1_andl(ident_t id_ref, int gtid, char lhs, char rhs);
	void __kmpc_atomic_fixed1_orl(ident_t id_ref, int gtid, char lhs, char rhs);
	void __kmpc_atomic_fixed2_andl(ident_t id_ref, int gtid, short lhs,
	short rhs);
	void __kmpc_atomic_fixed2_orl(ident_t id_ref, int gtid, short lhs, short rhs);
	void __kmpc_atomic_fixed4_andl(ident_t id_ref, int gtid, kmp_int32 lhs,
	kmp_int32 rhs);
	void __kmpc_atomic_fixed4_orl(ident_t id_ref, int gtid, kmp_int32 lhs,
	kmp_int32 rhs);
	void __kmpc_atomic_fixed8_andl(ident_t id_ref, int gtid, kmp_int64 lhs,
	kmp_int64 rhs);
	void __kmpc_atomic_fixed8_orl(ident_t id_ref, int gtid, kmp_int64 lhs,
	kmp_int64 rhs);
	// MIN / MAX
	void __kmpc_atomic_fixed1_max(ident_t id_ref, int gtid, char lhs, char rhs);
	void __kmpc_atomic_fixed1_min(ident_t id_ref, int gtid, char lhs, char rhs);
	void __kmpc_atomic_fixed2_max(ident_t id_ref, int gtid, short lhs, short rhs);
	void __kmpc_atomic_fixed2_min(ident_t id_ref, int gtid, short lhs, short rhs);
	void __kmpc_atomic_fixed4_max(ident_t id_ref, int gtid, kmp_int32 lhs,
	kmp_int32 rhs);
	void __kmpc_atomic_fixed4_min(ident_t id_ref, int gtid, kmp_int32 lhs,
	kmp_int32 rhs);
	void __kmpc_atomic_fixed8_max(ident_t id_ref, int gtid, kmp_int64 lhs,
	kmp_int64 rhs);
	void __kmpc_atomic_fixed8_min(ident_t id_ref, int gtid, kmp_int64 lhs,
	kmp_int64 rhs);
	void __kmpc_atomic_float4_max(ident_t id_ref, int gtid, kmp_real32 lhs,
	kmp_real32 rhs);
	void __kmpc_atomic_float4_min(ident_t id_ref, int gtid, kmp_real32 lhs,
	kmp_real32 rhs);
	void __kmpc_atomic_float8_max(ident_t id_ref, int gtid, kmp_real64 lhs,
	kmp_real64 rhs);
	void __kmpc_atomic_float8_min(ident_t id_ref, int gtid, kmp_real64 lhs,
	kmp_real64 rhs);
	#if KMP_HAVE_QUAD
	void __kmpc_atomic_float16_max(ident_t id_ref, int gtid, QUAD_LEGACY lhs,
	QUAD_LEGACY rhs);
	void __kmpc_atomic_float16_min(ident_t id_ref, int gtid, QUAD_LEGACY lhs,
	QUAD_LEGACY rhs);
	#if (KMP_ARCH_X86)
	// Routines with 16-byte arguments aligned to 16-byte boundary; IA-32
	// architecture only
	void __kmpc_atomic_float16_max_a16(ident_t id_ref, int gtid, Quad_a16_t lhs,
	Quad_a16_t rhs);
	void __kmpc_atomic_float16_min_a16(ident_t id_ref, int gtid, Quad_a16_t lhs,
	Quad_a16_t rhs);
	#endif
	#endif
	// .NEQV. (same as xor)
	void __kmpc_atomic_fixed1_neqv(ident_t id_ref, int gtid, char lhs, char rhs);
	void __kmpc_atomic_fixed2_neqv(ident_t id_ref, int gtid, short lhs,
	short rhs);
	void __kmpc_atomic_fixed4_neqv(ident_t id_ref, int gtid, kmp_int32 lhs,
	kmp_int32 rhs);
	void __kmpc_atomic_fixed8_neqv(ident_t id_ref, int gtid, kmp_int64 lhs,
	kmp_int64 rhs);
	// .EQV. (same as ~xor)
	void __kmpc_atomic_fixed1_eqv(ident_t id_ref, int gtid, char lhs, char rhs);
	void __kmpc_atomic_fixed2_eqv(ident_t id_ref, int gtid, short lhs, short rhs);
	void __kmpc_atomic_fixed4_eqv(ident_t id_ref, int gtid, kmp_int32 lhs,
	kmp_int32 rhs);
	void __kmpc_atomic_fixed8_eqv(ident_t id_ref, int gtid, kmp_int64 lhs,
	kmp_int64 rhs);
	// long double type
	void __kmpc_atomic_float10_add(ident_t id_ref, int gtid, long double lhs,
	long double rhs);
	void __kmpc_atomic_float10_sub(ident_t id_ref, int gtid, long double lhs,
	long double rhs);
	void __kmpc_atomic_float10_mul(ident_t id_ref, int gtid, long double lhs,
	long double rhs);
	void __kmpc_atomic_float10_div(ident_t id_ref, int gtid, long double lhs,
	long double rhs);
	// _Quad type
	#if KMP_HAVE_QUAD
	void __kmpc_atomic_float16_add(ident_t id_ref, int gtid, QUAD_LEGACY lhs,
	QUAD_LEGACY rhs);
	void __kmpc_atomic_float16_sub(ident_t id_ref, int gtid, QUAD_LEGACY lhs,
	QUAD_LEGACY rhs);
	void __kmpc_atomic_float16_mul(ident_t id_ref, int gtid, QUAD_LEGACY lhs,
	QUAD_LEGACY rhs);
	void __kmpc_atomic_float16_div(ident_t id_ref, int gtid, QUAD_LEGACY lhs,
	QUAD_LEGACY rhs);
	#if (KMP_ARCH_X86)
	// Routines with 16-byte arguments aligned to 16-byte boundary
	void __kmpc_atomic_float16_add_a16(ident_t id_ref, int gtid, Quad_a16_t lhs,
	Quad_a16_t rhs);
	void __kmpc_atomic_float16_sub_a16(ident_t id_ref, int gtid, Quad_a16_t lhs,
	Quad_a16_t rhs);
	void __kmpc_atomic_float16_mul_a16(ident_t id_ref, int gtid, Quad_a16_t lhs,
	Quad_a16_t rhs);
	void __kmpc_atomic_float16_div_a16(ident_t id_ref, int gtid, Quad_a16_t lhs,
	Quad_a16_t rhs);
	#endif
	#endif
	// routines for complex types
	void __kmpc_atomic_cmplx4_add(ident_t id_ref, int gtid, kmp_cmplx32 lhs,
	kmp_cmplx32 rhs);
	void __kmpc_atomic_cmplx4_sub(ident_t id_ref, int gtid, kmp_cmplx32 lhs,
	kmp_cmplx32 rhs);
	void __kmpc_atomic_cmplx4_mul(ident_t id_ref, int gtid, kmp_cmplx32 lhs,
	kmp_cmplx32 rhs);
	void __kmpc_atomic_cmplx4_div(ident_t id_ref, int gtid, kmp_cmplx32 lhs,
	kmp_cmplx32 rhs);
	void __kmpc_atomic_cmplx8_add(ident_t id_ref, int gtid, kmp_cmplx64 lhs,
	kmp_cmplx64 rhs);
	void __kmpc_atomic_cmplx8_sub(ident_t id_ref, int gtid, kmp_cmplx64 lhs,
	kmp_cmplx64 rhs);
	void __kmpc_atomic_cmplx8_mul(ident_t id_ref, int gtid, kmp_cmplx64 lhs,
	kmp_cmplx64 rhs);
	void __kmpc_atomic_cmplx8_div(ident_t id_ref, int gtid, kmp_cmplx64 lhs,
	kmp_cmplx64 rhs);
	void __kmpc_atomic_cmplx10_add(ident_t id_ref, int gtid, kmp_cmplx80 lhs,
	kmp_cmplx80 rhs);
	void __kmpc_atomic_cmplx10_sub(ident_t id_ref, int gtid, kmp_cmplx80 lhs,
	kmp_cmplx80 rhs);
	void __kmpc_atomic_cmplx10_mul(ident_t id_ref, int gtid, kmp_cmplx80 lhs,
	kmp_cmplx80 rhs);
	void __kmpc_atomic_cmplx10_div(ident_t id_ref, int gtid, kmp_cmplx80 lhs,
	kmp_cmplx80 rhs);
	#if KMP_HAVE_QUAD
	void __kmpc_atomic_cmplx16_add(ident_t id_ref, int gtid, CPLX128_LEG lhs,
	CPLX128_LEG rhs);
	void __kmpc_atomic_cmplx16_sub(ident_t id_ref, int gtid, CPLX128_LEG lhs,
	CPLX128_LEG rhs);
	void __kmpc_atomic_cmplx16_mul(ident_t id_ref, int gtid, CPLX128_LEG lhs,
	CPLX128_LEG rhs);
	void __kmpc_atomic_cmplx16_div(ident_t id_ref, int gtid, CPLX128_LEG lhs,
	CPLX128_LEG rhs);
	#if (KMP_ARCH_X86)
	// Routines with 16-byte arguments aligned to 16-byte boundary
	void __kmpc_atomic_cmplx16_add_a16(ident_t *id_ref, int gtid,
	kmp_cmplx128_a16_t *lhs,
	kmp_cmplx128_a16_t rhs);
	void __kmpc_atomic_cmplx16_sub_a16(ident_t *id_ref, int gtid,
	kmp_cmplx128_a16_t *lhs,
	kmp_cmplx128_a16_t rhs);
	void __kmpc_atomic_cmplx16_mul_a16(ident_t *id_ref, int gtid,
	kmp_cmplx128_a16_t *lhs,
	kmp_cmplx128_a16_t rhs);
	void __kmpc_atomic_cmplx16_div_a16(ident_t *id_ref, int gtid,
	kmp_cmplx128_a16_t *lhs,
	kmp_cmplx128_a16_t rhs);
	#endif
	#endif

	#if OMP_40_ENABLED

	// OpenMP 4.0: x = expr binop x for non-commutative operations.
	// Supported only on IA-32 architecture and Intel(R) 64
	#if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64

	void __kmpc_atomic_fixed1_sub_rev(ident_t id_ref, int gtid, char lhs,
	char rhs);
	void __kmpc_atomic_fixed1_div_rev(ident_t id_ref, int gtid, char lhs,
	char rhs);
	void __kmpc_atomic_fixed1u_div_rev(ident_t *id_ref, int gtid,
	unsigned char *lhs, unsigned char rhs);
	void __kmpc_atomic_fixed1_shl_rev(ident_t id_ref, int gtid, char lhs,
	char rhs);
	void __kmpc_atomic_fixed1_shr_rev(ident_t id_ref, int gtid, char lhs,
	char rhs);
	void __kmpc_atomic_fixed1u_shr_rev(ident_t *id_ref, int gtid,
	unsigned char *lhs, unsigned char rhs);
	void __kmpc_atomic_fixed2_sub_rev(ident_t id_ref, int gtid, short lhs,
	short rhs);
	void __kmpc_atomic_fixed2_div_rev(ident_t id_ref, int gtid, short lhs,
	short rhs);
	void __kmpc_atomic_fixed2u_div_rev(ident_t *id_ref, int gtid,
	unsigned short *lhs, unsigned short rhs);
	void __kmpc_atomic_fixed2_shl_rev(ident_t id_ref, int gtid, short lhs,
	short rhs);
	void __kmpc_atomic_fixed2_shr_rev(ident_t id_ref, int gtid, short lhs,
	short rhs);
	void __kmpc_atomic_fixed2u_shr_rev(ident_t *id_ref, int gtid,
	unsigned short *lhs, unsigned short rhs);
	void __kmpc_atomic_fixed4_sub_rev(ident_t id_ref, int gtid, kmp_int32 lhs,
	kmp_int32 rhs);
	void __kmpc_atomic_fixed4_div_rev(ident_t id_ref, int gtid, kmp_int32 lhs,
	kmp_int32 rhs);
	void __kmpc_atomic_fixed4u_div_rev(ident_t id_ref, int gtid, kmp_uint32 lhs,
	kmp_uint32 rhs);
	void __kmpc_atomic_fixed4_shl_rev(ident_t id_ref, int gtid, kmp_int32 lhs,
	kmp_int32 rhs);
	void __kmpc_atomic_fixed4_shr_rev(ident_t id_ref, int gtid, kmp_int32 lhs,
	kmp_int32 rhs);
	void __kmpc_atomic_fixed4u_shr_rev(ident_t id_ref, int gtid, kmp_uint32 lhs,
	kmp_uint32 rhs);
	void __kmpc_atomic_fixed8_sub_rev(ident_t id_ref, int gtid, kmp_int64 lhs,
	kmp_int64 rhs);
	void __kmpc_atomic_fixed8_div_rev(ident_t id_ref, int gtid, kmp_int64 lhs,
	kmp_int64 rhs);
	void __kmpc_atomic_fixed8u_div_rev(ident_t id_ref, int gtid, kmp_uint64 lhs,
	kmp_uint64 rhs);
	void __kmpc_atomic_fixed8_shl_rev(ident_t id_ref, int gtid, kmp_int64 lhs,
	kmp_int64 rhs);
	void __kmpc_atomic_fixed8_shr_rev(ident_t id_ref, int gtid, kmp_int64 lhs,
	kmp_int64 rhs);
	void __kmpc_atomic_fixed8u_shr_rev(ident_t id_ref, int gtid, kmp_uint64 lhs,
	kmp_uint64 rhs);
	void __kmpc_atomic_float4_sub_rev(ident_t id_ref, int gtid, float lhs,
	float rhs);
	void __kmpc_atomic_float4_div_rev(ident_t id_ref, int gtid, float lhs,
	float rhs);
	void __kmpc_atomic_float8_sub_rev(ident_t id_ref, int gtid, double lhs,
	double rhs);
	void __kmpc_atomic_float8_div_rev(ident_t id_ref, int gtid, double lhs,
	double rhs);
	void __kmpc_atomic_float10_sub_rev(ident_t id_ref, int gtid, long double lhs,
	long double rhs);
	void __kmpc_atomic_float10_div_rev(ident_t id_ref, int gtid, long double lhs,
	long double rhs);
	#if KMP_HAVE_QUAD
	void __kmpc_atomic_float16_sub_rev(ident_t id_ref, int gtid, QUAD_LEGACY lhs,
	QUAD_LEGACY rhs);
	void __kmpc_atomic_float16_div_rev(ident_t id_ref, int gtid, QUAD_LEGACY lhs,
	QUAD_LEGACY rhs);
	#endif
	void __kmpc_atomic_cmplx4_sub_rev(ident_t id_ref, int gtid, kmp_cmplx32 lhs,
	kmp_cmplx32 rhs);
	void __kmpc_atomic_cmplx4_div_rev(ident_t id_ref, int gtid, kmp_cmplx32 lhs,
	kmp_cmplx32 rhs);
	void __kmpc_atomic_cmplx8_sub_rev(ident_t id_ref, int gtid, kmp_cmplx64 lhs,
	kmp_cmplx64 rhs);
	void __kmpc_atomic_cmplx8_div_rev(ident_t id_ref, int gtid, kmp_cmplx64 lhs,
	kmp_cmplx64 rhs);
	void __kmpc_atomic_cmplx10_sub_rev(ident_t id_ref, int gtid, kmp_cmplx80 lhs,
	kmp_cmplx80 rhs);
	void __kmpc_atomic_cmplx10_div_rev(ident_t id_ref, int gtid, kmp_cmplx80 lhs,
	kmp_cmplx80 rhs);
	#if KMP_HAVE_QUAD
	void __kmpc_atomic_cmplx16_sub_rev(ident_t id_ref, int gtid, CPLX128_LEG lhs,
	CPLX128_LEG rhs);
	void __kmpc_atomic_cmplx16_div_rev(ident_t id_ref, int gtid, CPLX128_LEG lhs,
	CPLX128_LEG rhs);
	#if (KMP_ARCH_X86)
	// Routines with 16-byte arguments aligned to 16-byte boundary
	void __kmpc_atomic_float16_sub_a16_rev(ident_t *id_ref, int gtid,
	Quad_a16_t *lhs, Quad_a16_t rhs);
	void __kmpc_atomic_float16_div_a16_rev(ident_t *id_ref, int gtid,
	Quad_a16_t *lhs, Quad_a16_t rhs);
	void __kmpc_atomic_cmplx16_sub_a16_rev(ident_t *id_ref, int gtid,
	kmp_cmplx128_a16_t *lhs,
	kmp_cmplx128_a16_t rhs);
	void __kmpc_atomic_cmplx16_div_a16_rev(ident_t *id_ref, int gtid,
	kmp_cmplx128_a16_t *lhs,
	kmp_cmplx128_a16_t rhs);
	#endif
	#endif // KMP_HAVE_QUAD

	#endif // KMP_ARCH_X86 \|\| KMP_ARCH_X86_64

	#endif // OMP_40_ENABLED

	// routines for mixed types

	// RHS=float8
	void __kmpc_atomic_fixed1_mul_float8(ident_t id_ref, int gtid, char lhs,
	kmp_real64 rhs);
	void __kmpc_atomic_fixed1_div_float8(ident_t id_ref, int gtid, char lhs,
	kmp_real64 rhs);
	void __kmpc_atomic_fixed2_mul_float8(ident_t id_ref, int gtid, short lhs,
	kmp_real64 rhs);
	void __kmpc_atomic_fixed2_div_float8(ident_t id_ref, int gtid, short lhs,
	kmp_real64 rhs);
	void __kmpc_atomic_fixed4_mul_float8(ident_t id_ref, int gtid, kmp_int32 lhs,
	kmp_real64 rhs);
	void __kmpc_atomic_fixed4_div_float8(ident_t id_ref, int gtid, kmp_int32 lhs,
	kmp_real64 rhs);
	void __kmpc_atomic_fixed8_mul_float8(ident_t id_ref, int gtid, kmp_int64 lhs,
	kmp_real64 rhs);
	void __kmpc_atomic_fixed8_div_float8(ident_t id_ref, int gtid, kmp_int64 lhs,
	kmp_real64 rhs);
	void __kmpc_atomic_float4_add_float8(ident_t id_ref, int gtid, kmp_real32 lhs,
	kmp_real64 rhs);
	void __kmpc_atomic_float4_sub_float8(ident_t id_ref, int gtid, kmp_real32 lhs,
	kmp_real64 rhs);
	void __kmpc_atomic_float4_mul_float8(ident_t id_ref, int gtid, kmp_real32 lhs,
	kmp_real64 rhs);
	void __kmpc_atomic_float4_div_float8(ident_t id_ref, int gtid, kmp_real32 lhs,
	kmp_real64 rhs);

	// RHS=float16 (deprecated, to be removed when we are sure the compiler does not
	// use them)
	#if KMP_HAVE_QUAD
	void __kmpc_atomic_fixed1_add_fp(ident_t id_ref, int gtid, char lhs,
	_Quad rhs);
	void __kmpc_atomic_fixed1u_add_fp(ident_t id_ref, int gtid, unsigned char lhs,
	_Quad rhs);
	void __kmpc_atomic_fixed1_sub_fp(ident_t id_ref, int gtid, char lhs,
	_Quad rhs);
	void __kmpc_atomic_fixed1u_sub_fp(ident_t id_ref, int gtid, unsigned char lhs,
	_Quad rhs);
	void __kmpc_atomic_fixed1_mul_fp(ident_t id_ref, int gtid, char lhs,
	_Quad rhs);
	void __kmpc_atomic_fixed1u_mul_fp(ident_t id_ref, int gtid, unsigned char lhs,
	_Quad rhs);
	void __kmpc_atomic_fixed1_div_fp(ident_t id_ref, int gtid, char lhs,
	_Quad rhs);
	void __kmpc_atomic_fixed1u_div_fp(ident_t id_ref, int gtid, unsigned char lhs,
	_Quad rhs);

	void __kmpc_atomic_fixed2_add_fp(ident_t id_ref, int gtid, short lhs,
	_Quad rhs);
	void __kmpc_atomic_fixed2u_add_fp(ident_t *id_ref, int gtid,
	unsigned short *lhs, _Quad rhs);
	void __kmpc_atomic_fixed2_sub_fp(ident_t id_ref, int gtid, short lhs,
	_Quad rhs);
	void __kmpc_atomic_fixed2u_sub_fp(ident_t *id_ref, int gtid,
	unsigned short *lhs, _Quad rhs);
	void __kmpc_atomic_fixed2_mul_fp(ident_t id_ref, int gtid, short lhs,
	_Quad rhs);
	void __kmpc_atomic_fixed2u_mul_fp(ident_t *id_ref, int gtid,
	unsigned short *lhs, _Quad rhs);
	void __kmpc_atomic_fixed2_div_fp(ident_t id_ref, int gtid, short lhs,
	_Quad rhs);
	void __kmpc_atomic_fixed2u_div_fp(ident_t *id_ref, int gtid,
	unsigned short *lhs, _Quad rhs);

	void __kmpc_atomic_fixed4_add_fp(ident_t id_ref, int gtid, kmp_int32 lhs,
	_Quad rhs);
	void __kmpc_atomic_fixed4u_add_fp(ident_t id_ref, int gtid, kmp_uint32 lhs,
	_Quad rhs);
	void __kmpc_atomic_fixed4_sub_fp(ident_t id_ref, int gtid, kmp_int32 lhs,
	_Quad rhs);
	void __kmpc_atomic_fixed4u_sub_fp(ident_t id_ref, int gtid, kmp_uint32 lhs,
	_Quad rhs);
	void __kmpc_atomic_fixed4_mul_fp(ident_t id_ref, int gtid, kmp_int32 lhs,
	_Quad rhs);
	void __kmpc_atomic_fixed4u_mul_fp(ident_t id_ref, int gtid, kmp_uint32 lhs,
	_Quad rhs);
	void __kmpc_atomic_fixed4_div_fp(ident_t id_ref, int gtid, kmp_int32 lhs,
	_Quad rhs);
	void __kmpc_atomic_fixed4u_div_fp(ident_t id_ref, int gtid, kmp_uint32 lhs,
	_Quad rhs);

	void __kmpc_atomic_fixed8_add_fp(ident_t id_ref, int gtid, kmp_int64 lhs,
	_Quad rhs);
	void __kmpc_atomic_fixed8u_add_fp(ident_t id_ref, int gtid, kmp_uint64 lhs,
	_Quad rhs);
	void __kmpc_atomic_fixed8_sub_fp(ident_t id_ref, int gtid, kmp_int64 lhs,
	_Quad rhs);
	void __kmpc_atomic_fixed8u_sub_fp(ident_t id_ref, int gtid, kmp_uint64 lhs,
	_Quad rhs);
	void __kmpc_atomic_fixed8_mul_fp(ident_t id_ref, int gtid, kmp_int64 lhs,
	_Quad rhs);
	void __kmpc_atomic_fixed8u_mul_fp(ident_t id_ref, int gtid, kmp_uint64 lhs,
	_Quad rhs);
	void __kmpc_atomic_fixed8_div_fp(ident_t id_ref, int gtid, kmp_int64 lhs,
	_Quad rhs);
	void __kmpc_atomic_fixed8u_div_fp(ident_t id_ref, int gtid, kmp_uint64 lhs,
	_Quad rhs);

	void __kmpc_atomic_float4_add_fp(ident_t id_ref, int gtid, kmp_real32 lhs,
	_Quad rhs);
	void __kmpc_atomic_float4_sub_fp(ident_t id_ref, int gtid, kmp_real32 lhs,
	_Quad rhs);
	void __kmpc_atomic_float4_mul_fp(ident_t id_ref, int gtid, kmp_real32 lhs,
	_Quad rhs);
	void __kmpc_atomic_float4_div_fp(ident_t id_ref, int gtid, kmp_real32 lhs,
	_Quad rhs);

	void __kmpc_atomic_float8_add_fp(ident_t id_ref, int gtid, kmp_real64 lhs,
	_Quad rhs);
	void __kmpc_atomic_float8_sub_fp(ident_t id_ref, int gtid, kmp_real64 lhs,
	_Quad rhs);
	void __kmpc_atomic_float8_mul_fp(ident_t id_ref, int gtid, kmp_real64 lhs,
	_Quad rhs);
	void __kmpc_atomic_float8_div_fp(ident_t id_ref, int gtid, kmp_real64 lhs,
	_Quad rhs);

	void __kmpc_atomic_float10_add_fp(ident_t id_ref, int gtid, long double lhs,
	_Quad rhs);
	void __kmpc_atomic_float10_sub_fp(ident_t id_ref, int gtid, long double lhs,
	_Quad rhs);
	void __kmpc_atomic_float10_mul_fp(ident_t id_ref, int gtid, long double lhs,
	_Quad rhs);
	void __kmpc_atomic_float10_div_fp(ident_t id_ref, int gtid, long double lhs,
	_Quad rhs);

	// Reverse operations
	void __kmpc_atomic_fixed1_sub_rev_fp(ident_t id_ref, int gtid, char lhs,
	_Quad rhs);
	void __kmpc_atomic_fixed1u_sub_rev_fp(ident_t *id_ref, int gtid,
	unsigned char *lhs, _Quad rhs);
	void __kmpc_atomic_fixed1_div_rev_fp(ident_t id_ref, int gtid, char lhs,
	_Quad rhs);
	void __kmpc_atomic_fixed1u_div_rev_fp(ident_t *id_ref, int gtid,
	unsigned char *lhs, _Quad rhs);
	void __kmpc_atomic_fixed2_sub_rev_fp(ident_t id_ref, int gtid, short lhs,
	_Quad rhs);
	void __kmpc_atomic_fixed2u_sub_rev_fp(ident_t *id_ref, int gtid,
	unsigned short *lhs, _Quad rhs);
	void __kmpc_atomic_fixed2_div_rev_fp(ident_t id_ref, int gtid, short lhs,
	_Quad rhs);
	void __kmpc_atomic_fixed2u_div_rev_fp(ident_t *id_ref, int gtid,
	unsigned short *lhs, _Quad rhs);
	void __kmpc_atomic_fixed4_sub_rev_fp(ident_t id_ref, int gtid, kmp_int32 lhs,
	_Quad rhs);
	void __kmpc_atomic_fixed4u_sub_rev_fp(ident_t *id_ref, int gtid,
	kmp_uint32 *lhs, _Quad rhs);
	void __kmpc_atomic_fixed4_div_rev_fp(ident_t id_ref, int gtid, kmp_int32 lhs,
	_Quad rhs);
	void __kmpc_atomic_fixed4u_div_rev_fp(ident_t *id_ref, int gtid,
	kmp_uint32 *lhs, _Quad rhs);
	void __kmpc_atomic_fixed8_sub_rev_fp(ident_t id_ref, int gtid, kmp_int64 lhs,
	_Quad rhs);
	void __kmpc_atomic_fixed8u_sub_rev_fp(ident_t *id_ref, int gtid,
	kmp_uint64 *lhs, _Quad rhs);
	void __kmpc_atomic_fixed8_div_rev_fp(ident_t id_ref, int gtid, kmp_int64 lhs,
	_Quad rhs);
	void __kmpc_atomic_fixed8u_div_rev_fp(ident_t *id_ref, int gtid,
	kmp_uint64 *lhs, _Quad rhs);
	void __kmpc_atomic_float4_sub_rev_fp(ident_t id_ref, int gtid, float lhs,
	_Quad rhs);
	void __kmpc_atomic_float4_div_rev_fp(ident_t id_ref, int gtid, float lhs,
	_Quad rhs);
	void __kmpc_atomic_float8_sub_rev_fp(ident_t id_ref, int gtid, double lhs,
	_Quad rhs);
	void __kmpc_atomic_float8_div_rev_fp(ident_t id_ref, int gtid, double lhs,
	_Quad rhs);
	void __kmpc_atomic_float10_sub_rev_fp(ident_t *id_ref, int gtid,
	long double *lhs, _Quad rhs);
	void __kmpc_atomic_float10_div_rev_fp(ident_t *id_ref, int gtid,
	long double *lhs, _Quad rhs);

	#endif // KMP_HAVE_QUAD

	// RHS=cmplx8
	void __kmpc_atomic_cmplx4_add_cmplx8(ident_t *id_ref, int gtid,
	kmp_cmplx32 *lhs, kmp_cmplx64 rhs);
	void __kmpc_atomic_cmplx4_sub_cmplx8(ident_t *id_ref, int gtid,
	kmp_cmplx32 *lhs, kmp_cmplx64 rhs);
	void __kmpc_atomic_cmplx4_mul_cmplx8(ident_t *id_ref, int gtid,
	kmp_cmplx32 *lhs, kmp_cmplx64 rhs);
	void __kmpc_atomic_cmplx4_div_cmplx8(ident_t *id_ref, int gtid,
	kmp_cmplx32 *lhs, kmp_cmplx64 rhs);

	// generic atomic routines
	void __kmpc_atomic_1(ident_t id_ref, int gtid, void lhs, void *rhs,
	void (f)(void , void , void ));
	void __kmpc_atomic_2(ident_t id_ref, int gtid, void lhs, void *rhs,
	void (f)(void , void , void ));
	void __kmpc_atomic_4(ident_t id_ref, int gtid, void lhs, void *rhs,
	void (f)(void , void , void ));
	void __kmpc_atomic_8(ident_t id_ref, int gtid, void lhs, void *rhs,
	void (f)(void , void , void ));
	void __kmpc_atomic_10(ident_t id_ref, int gtid, void lhs, void *rhs,
	void (f)(void , void , void ));
	void __kmpc_atomic_16(ident_t id_ref, int gtid, void lhs, void *rhs,
	void (f)(void , void , void ));
	void __kmpc_atomic_20(ident_t id_ref, int gtid, void lhs, void *rhs,
	void (f)(void , void , void ));
	void __kmpc_atomic_32(ident_t id_ref, int gtid, void lhs, void *rhs,
	void (f)(void , void , void ));

	// READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
	#if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64

	// Below routines for atomic READ are listed
	char __kmpc_atomic_fixed1_rd(ident_t id_ref, int gtid, char loc);
	short __kmpc_atomic_fixed2_rd(ident_t id_ref, int gtid, short loc);
	kmp_int32 __kmpc_atomic_fixed4_rd(ident_t id_ref, int gtid, kmp_int32 loc);
	kmp_int64 __kmpc_atomic_fixed8_rd(ident_t id_ref, int gtid, kmp_int64 loc);
	kmp_real32 __kmpc_atomic_float4_rd(ident_t id_ref, int gtid, kmp_real32 loc);
	kmp_real64 __kmpc_atomic_float8_rd(ident_t id_ref, int gtid, kmp_real64 loc);
	long double __kmpc_atomic_float10_rd(ident_t *id_ref, int gtid,
	long double *loc);
	#if KMP_HAVE_QUAD
	QUAD_LEGACY __kmpc_atomic_float16_rd(ident_t *id_ref, int gtid,
	QUAD_LEGACY *loc);
	#endif
	// Fix for CQ220361: cmplx4 READ will return void on Windows* OS; read value
	// will be returned through an additional parameter
	#if (KMP_OS_WINDOWS)
	void __kmpc_atomic_cmplx4_rd(kmp_cmplx32 out, ident_t id_ref, int gtid,
	kmp_cmplx32 *loc);
	#else
	kmp_cmplx32 __kmpc_atomic_cmplx4_rd(ident_t *id_ref, int gtid,
	kmp_cmplx32 *loc);
	#endif
	kmp_cmplx64 __kmpc_atomic_cmplx8_rd(ident_t *id_ref, int gtid,
	kmp_cmplx64 *loc);
	kmp_cmplx80 __kmpc_atomic_cmplx10_rd(ident_t *id_ref, int gtid,
	kmp_cmplx80 *loc);
	#if KMP_HAVE_QUAD
	CPLX128_LEG __kmpc_atomic_cmplx16_rd(ident_t *id_ref, int gtid,
	CPLX128_LEG *loc);
	#if (KMP_ARCH_X86)
	// Routines with 16-byte arguments aligned to 16-byte boundary
	Quad_a16_t __kmpc_atomic_float16_a16_rd(ident_t *id_ref, int gtid,
	Quad_a16_t *loc);
	kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_a16_rd(ident_t *id_ref, int gtid,
	kmp_cmplx128_a16_t *loc);
	#endif
	#endif

	// Below routines for atomic WRITE are listed
	void __kmpc_atomic_fixed1_wr(ident_t id_ref, int gtid, char lhs, char rhs);
	void __kmpc_atomic_fixed2_wr(ident_t id_ref, int gtid, short lhs, short rhs);
	void __kmpc_atomic_fixed4_wr(ident_t id_ref, int gtid, kmp_int32 lhs,
	kmp_int32 rhs);
	void __kmpc_atomic_fixed8_wr(ident_t id_ref, int gtid, kmp_int64 lhs,
	kmp_int64 rhs);
	void __kmpc_atomic_float4_wr(ident_t id_ref, int gtid, kmp_real32 lhs,
	kmp_real32 rhs);
	void __kmpc_atomic_float8_wr(ident_t id_ref, int gtid, kmp_real64 lhs,
	kmp_real64 rhs);
	void __kmpc_atomic_float10_wr(ident_t id_ref, int gtid, long double lhs,
	long double rhs);
	#if KMP_HAVE_QUAD
	void __kmpc_atomic_float16_wr(ident_t id_ref, int gtid, QUAD_LEGACY lhs,
	QUAD_LEGACY rhs);
	#endif
	void __kmpc_atomic_cmplx4_wr(ident_t id_ref, int gtid, kmp_cmplx32 lhs,
	kmp_cmplx32 rhs);
	void __kmpc_atomic_cmplx8_wr(ident_t id_ref, int gtid, kmp_cmplx64 lhs,
	kmp_cmplx64 rhs);
	void __kmpc_atomic_cmplx10_wr(ident_t id_ref, int gtid, kmp_cmplx80 lhs,
	kmp_cmplx80 rhs);
	#if KMP_HAVE_QUAD
	void __kmpc_atomic_cmplx16_wr(ident_t id_ref, int gtid, CPLX128_LEG lhs,
	CPLX128_LEG rhs);
	#if (KMP_ARCH_X86)
	// Routines with 16-byte arguments aligned to 16-byte boundary
	void __kmpc_atomic_float16_a16_wr(ident_t id_ref, int gtid, Quad_a16_t lhs,
	Quad_a16_t rhs);
	void __kmpc_atomic_cmplx16_a16_wr(ident_t *id_ref, int gtid,
	kmp_cmplx128_a16_t *lhs,
	kmp_cmplx128_a16_t rhs);
	#endif
	#endif

	// Below routines for atomic CAPTURE are listed

	// 1-byte
	char __kmpc_atomic_fixed1_add_cpt(ident_t id_ref, int gtid, char lhs,
	char rhs, int flag);
	char __kmpc_atomic_fixed1_andb_cpt(ident_t id_ref, int gtid, char lhs,
	char rhs, int flag);
	char __kmpc_atomic_fixed1_div_cpt(ident_t id_ref, int gtid, char lhs,
	char rhs, int flag);
	unsigned char __kmpc_atomic_fixed1u_div_cpt(ident_t *id_ref, int gtid,
	unsigned char *lhs,
	unsigned char rhs, int flag);
	char __kmpc_atomic_fixed1_mul_cpt(ident_t id_ref, int gtid, char lhs,
	char rhs, int flag);
	char __kmpc_atomic_fixed1_orb_cpt(ident_t id_ref, int gtid, char lhs,
	char rhs, int flag);
	char __kmpc_atomic_fixed1_shl_cpt(ident_t id_ref, int gtid, char lhs,
	char rhs, int flag);
	char __kmpc_atomic_fixed1_shr_cpt(ident_t id_ref, int gtid, char lhs,
	char rhs, int flag);
	unsigned char __kmpc_atomic_fixed1u_shr_cpt(ident_t *id_ref, int gtid,
	unsigned char *lhs,
	unsigned char rhs, int flag);
	char __kmpc_atomic_fixed1_sub_cpt(ident_t id_ref, int gtid, char lhs,
	char rhs, int flag);
	char __kmpc_atomic_fixed1_xor_cpt(ident_t id_ref, int gtid, char lhs,
	char rhs, int flag);
	// 2-byte
	short __kmpc_atomic_fixed2_add_cpt(ident_t id_ref, int gtid, short lhs,
	short rhs, int flag);
	short __kmpc_atomic_fixed2_andb_cpt(ident_t id_ref, int gtid, short lhs,
	short rhs, int flag);
	short __kmpc_atomic_fixed2_div_cpt(ident_t id_ref, int gtid, short lhs,
	short rhs, int flag);
	unsigned short __kmpc_atomic_fixed2u_div_cpt(ident_t *id_ref, int gtid,
	unsigned short *lhs,
	unsigned short rhs, int flag);
	short __kmpc_atomic_fixed2_mul_cpt(ident_t id_ref, int gtid, short lhs,
	short rhs, int flag);
	short __kmpc_atomic_fixed2_orb_cpt(ident_t id_ref, int gtid, short lhs,
	short rhs, int flag);
	short __kmpc_atomic_fixed2_shl_cpt(ident_t id_ref, int gtid, short lhs,
	short rhs, int flag);
	short __kmpc_atomic_fixed2_shr_cpt(ident_t id_ref, int gtid, short lhs,
	short rhs, int flag);
	unsigned short __kmpc_atomic_fixed2u_shr_cpt(ident_t *id_ref, int gtid,
	unsigned short *lhs,
	unsigned short rhs, int flag);
	short __kmpc_atomic_fixed2_sub_cpt(ident_t id_ref, int gtid, short lhs,
	short rhs, int flag);
	short __kmpc_atomic_fixed2_xor_cpt(ident_t id_ref, int gtid, short lhs,
	short rhs, int flag);
	// 4-byte add / sub fixed
	kmp_int32 __kmpc_atomic_fixed4_add_cpt(ident_t *id_ref, int gtid,
	kmp_int32 *lhs, kmp_int32 rhs, int flag);
	kmp_int32 __kmpc_atomic_fixed4_sub_cpt(ident_t *id_ref, int gtid,
	kmp_int32 *lhs, kmp_int32 rhs, int flag);
	// 4-byte add / sub float
	kmp_real32 __kmpc_atomic_float4_add_cpt(ident_t *id_ref, int gtid,
	kmp_real32 *lhs, kmp_real32 rhs,
	int flag);
	kmp_real32 __kmpc_atomic_float4_sub_cpt(ident_t *id_ref, int gtid,
	kmp_real32 *lhs, kmp_real32 rhs,
	int flag);
	// 8-byte add / sub fixed
	kmp_int64 __kmpc_atomic_fixed8_add_cpt(ident_t *id_ref, int gtid,
	kmp_int64 *lhs, kmp_int64 rhs, int flag);
	kmp_int64 __kmpc_atomic_fixed8_sub_cpt(ident_t *id_ref, int gtid,
	kmp_int64 *lhs, kmp_int64 rhs, int flag);
	// 8-byte add / sub float
	kmp_real64 __kmpc_atomic_float8_add_cpt(ident_t *id_ref, int gtid,
	kmp_real64 *lhs, kmp_real64 rhs,
	int flag);
	kmp_real64 __kmpc_atomic_float8_sub_cpt(ident_t *id_ref, int gtid,
	kmp_real64 *lhs, kmp_real64 rhs,
	int flag);
	// 4-byte fixed
	kmp_int32 __kmpc_atomic_fixed4_andb_cpt(ident_t *id_ref, int gtid,
	kmp_int32 *lhs, kmp_int32 rhs,
	int flag);
	kmp_int32 __kmpc_atomic_fixed4_div_cpt(ident_t *id_ref, int gtid,
	kmp_int32 *lhs, kmp_int32 rhs, int flag);
	kmp_uint32 __kmpc_atomic_fixed4u_div_cpt(ident_t *id_ref, int gtid,
	kmp_uint32 *lhs, kmp_uint32 rhs,
	int flag);
	kmp_int32 __kmpc_atomic_fixed4_mul_cpt(ident_t *id_ref, int gtid,
	kmp_int32 *lhs, kmp_int32 rhs, int flag);
	kmp_int32 __kmpc_atomic_fixed4_orb_cpt(ident_t *id_ref, int gtid,
	kmp_int32 *lhs, kmp_int32 rhs, int flag);
	kmp_int32 __kmpc_atomic_fixed4_shl_cpt(ident_t *id_ref, int gtid,
	kmp_int32 *lhs, kmp_int32 rhs, int flag);
	kmp_int32 __kmpc_atomic_fixed4_shr_cpt(ident_t *id_ref, int gtid,
	kmp_int32 *lhs, kmp_int32 rhs, int flag);
	kmp_uint32 __kmpc_atomic_fixed4u_shr_cpt(ident_t *id_ref, int gtid,
	kmp_uint32 *lhs, kmp_uint32 rhs,
	int flag);
	kmp_int32 __kmpc_atomic_fixed4_xor_cpt(ident_t *id_ref, int gtid,
	kmp_int32 *lhs, kmp_int32 rhs, int flag);
	// 8-byte fixed
	kmp_int64 __kmpc_atomic_fixed8_andb_cpt(ident_t *id_ref, int gtid,
	kmp_int64 *lhs, kmp_int64 rhs,
	int flag);
	kmp_int64 __kmpc_atomic_fixed8_div_cpt(ident_t *id_ref, int gtid,
	kmp_int64 *lhs, kmp_int64 rhs, int flag);
	kmp_uint64 __kmpc_atomic_fixed8u_div_cpt(ident_t *id_ref, int gtid,
	kmp_uint64 *lhs, kmp_uint64 rhs,
	int flag);
	kmp_int64 __kmpc_atomic_fixed8_mul_cpt(ident_t *id_ref, int gtid,
	kmp_int64 *lhs, kmp_int64 rhs, int flag);
	kmp_int64 __kmpc_atomic_fixed8_orb_cpt(ident_t *id_ref, int gtid,
	kmp_int64 *lhs, kmp_int64 rhs, int flag);
	kmp_int64 __kmpc_atomic_fixed8_shl_cpt(ident_t *id_ref, int gtid,
	kmp_int64 *lhs, kmp_int64 rhs, int flag);
	kmp_int64 __kmpc_atomic_fixed8_shr_cpt(ident_t *id_ref, int gtid,
	kmp_int64 *lhs, kmp_int64 rhs, int flag);
	kmp_uint64 __kmpc_atomic_fixed8u_shr_cpt(ident_t *id_ref, int gtid,
	kmp_uint64 *lhs, kmp_uint64 rhs,
	int flag);
	kmp_int64 __kmpc_atomic_fixed8_xor_cpt(ident_t *id_ref, int gtid,
	kmp_int64 *lhs, kmp_int64 rhs, int flag);
	// 4-byte float
	kmp_real32 __kmpc_atomic_float4_div_cpt(ident_t *id_ref, int gtid,
	kmp_real32 *lhs, kmp_real32 rhs,
	int flag);
	kmp_real32 __kmpc_atomic_float4_mul_cpt(ident_t *id_ref, int gtid,
	kmp_real32 *lhs, kmp_real32 rhs,
	int flag);
	// 8-byte float
	kmp_real64 __kmpc_atomic_float8_div_cpt(ident_t *id_ref, int gtid,
	kmp_real64 *lhs, kmp_real64 rhs,
	int flag);
	kmp_real64 __kmpc_atomic_float8_mul_cpt(ident_t *id_ref, int gtid,
	kmp_real64 *lhs, kmp_real64 rhs,
	int flag);
	// 1-, 2-, 4-, 8-byte logical (&&, \|\|)
	char __kmpc_atomic_fixed1_andl_cpt(ident_t id_ref, int gtid, char lhs,
	char rhs, int flag);
	char __kmpc_atomic_fixed1_orl_cpt(ident_t id_ref, int gtid, char lhs,
	char rhs, int flag);
	short __kmpc_atomic_fixed2_andl_cpt(ident_t id_ref, int gtid, short lhs,
	short rhs, int flag);
	short __kmpc_atomic_fixed2_orl_cpt(ident_t id_ref, int gtid, short lhs,
	short rhs, int flag);
	kmp_int32 __kmpc_atomic_fixed4_andl_cpt(ident_t *id_ref, int gtid,
	kmp_int32 *lhs, kmp_int32 rhs,
	int flag);
	kmp_int32 __kmpc_atomic_fixed4_orl_cpt(ident_t *id_ref, int gtid,
	kmp_int32 *lhs, kmp_int32 rhs, int flag);
	kmp_int64 __kmpc_atomic_fixed8_andl_cpt(ident_t *id_ref, int gtid,
	kmp_int64 *lhs, kmp_int64 rhs,
	int flag);
	kmp_int64 __kmpc_atomic_fixed8_orl_cpt(ident_t *id_ref, int gtid,
	kmp_int64 *lhs, kmp_int64 rhs, int flag);
	// MIN / MAX
	char __kmpc_atomic_fixed1_max_cpt(ident_t id_ref, int gtid, char lhs,
	char rhs, int flag);
	char __kmpc_atomic_fixed1_min_cpt(ident_t id_ref, int gtid, char lhs,
	char rhs, int flag);
	short __kmpc_atomic_fixed2_max_cpt(ident_t id_ref, int gtid, short lhs,
	short rhs, int flag);
	short __kmpc_atomic_fixed2_min_cpt(ident_t id_ref, int gtid, short lhs,
	short rhs, int flag);
	kmp_int32 __kmpc_atomic_fixed4_max_cpt(ident_t *id_ref, int gtid,
	kmp_int32 *lhs, kmp_int32 rhs, int flag);
	kmp_int32 __kmpc_atomic_fixed4_min_cpt(ident_t *id_ref, int gtid,
	kmp_int32 *lhs, kmp_int32 rhs, int flag);
	kmp_int64 __kmpc_atomic_fixed8_max_cpt(ident_t *id_ref, int gtid,
	kmp_int64 *lhs, kmp_int64 rhs, int flag);
	kmp_int64 __kmpc_atomic_fixed8_min_cpt(ident_t *id_ref, int gtid,
	kmp_int64 *lhs, kmp_int64 rhs, int flag);
	kmp_real32 __kmpc_atomic_float4_max_cpt(ident_t *id_ref, int gtid,
	kmp_real32 *lhs, kmp_real32 rhs,
	int flag);
	kmp_real32 __kmpc_atomic_float4_min_cpt(ident_t *id_ref, int gtid,
	kmp_real32 *lhs, kmp_real32 rhs,
	int flag);
	kmp_real64 __kmpc_atomic_float8_max_cpt(ident_t *id_ref, int gtid,
	kmp_real64 *lhs, kmp_real64 rhs,
	int flag);
	kmp_real64 __kmpc_atomic_float8_min_cpt(ident_t *id_ref, int gtid,
	kmp_real64 *lhs, kmp_real64 rhs,
	int flag);
	#if KMP_HAVE_QUAD
	QUAD_LEGACY __kmpc_atomic_float16_max_cpt(ident_t *id_ref, int gtid,
	QUAD_LEGACY *lhs, QUAD_LEGACY rhs,
	int flag);
	QUAD_LEGACY __kmpc_atomic_float16_min_cpt(ident_t *id_ref, int gtid,
	QUAD_LEGACY *lhs, QUAD_LEGACY rhs,
	int flag);
	#endif
	// .NEQV. (same as xor)
	char __kmpc_atomic_fixed1_neqv_cpt(ident_t id_ref, int gtid, char lhs,
	char rhs, int flag);
	short __kmpc_atomic_fixed2_neqv_cpt(ident_t id_ref, int gtid, short lhs,
	short rhs, int flag);
	kmp_int32 __kmpc_atomic_fixed4_neqv_cpt(ident_t *id_ref, int gtid,
	kmp_int32 *lhs, kmp_int32 rhs,
	int flag);
	kmp_int64 __kmpc_atomic_fixed8_neqv_cpt(ident_t *id_ref, int gtid,
	kmp_int64 *lhs, kmp_int64 rhs,
	int flag);
	// .EQV. (same as ~xor)
	char __kmpc_atomic_fixed1_eqv_cpt(ident_t id_ref, int gtid, char lhs,
	char rhs, int flag);
	short __kmpc_atomic_fixed2_eqv_cpt(ident_t id_ref, int gtid, short lhs,
	short rhs, int flag);
	kmp_int32 __kmpc_atomic_fixed4_eqv_cpt(ident_t *id_ref, int gtid,
	kmp_int32 *lhs, kmp_int32 rhs, int flag);
	kmp_int64 __kmpc_atomic_fixed8_eqv_cpt(ident_t *id_ref, int gtid,
	kmp_int64 *lhs, kmp_int64 rhs, int flag);
	// long double type
	long double __kmpc_atomic_float10_add_cpt(ident_t *id_ref, int gtid,
	long double *lhs, long double rhs,
	int flag);
	long double __kmpc_atomic_float10_sub_cpt(ident_t *id_ref, int gtid,
	long double *lhs, long double rhs,
	int flag);
	long double __kmpc_atomic_float10_mul_cpt(ident_t *id_ref, int gtid,
	long double *lhs, long double rhs,
	int flag);
	long double __kmpc_atomic_float10_div_cpt(ident_t *id_ref, int gtid,
	long double *lhs, long double rhs,
	int flag);
	#if KMP_HAVE_QUAD
	// _Quad type
	QUAD_LEGACY __kmpc_atomic_float16_add_cpt(ident_t *id_ref, int gtid,
	QUAD_LEGACY *lhs, QUAD_LEGACY rhs,
	int flag);
	QUAD_LEGACY __kmpc_atomic_float16_sub_cpt(ident_t *id_ref, int gtid,
	QUAD_LEGACY *lhs, QUAD_LEGACY rhs,
	int flag);
	QUAD_LEGACY __kmpc_atomic_float16_mul_cpt(ident_t *id_ref, int gtid,
	QUAD_LEGACY *lhs, QUAD_LEGACY rhs,
	int flag);
	QUAD_LEGACY __kmpc_atomic_float16_div_cpt(ident_t *id_ref, int gtid,
	QUAD_LEGACY *lhs, QUAD_LEGACY rhs,
	int flag);
	#endif
	// routines for complex types
	// Workaround for cmplx4 routines - return void; captured value is returned via
	// the argument
	void __kmpc_atomic_cmplx4_add_cpt(ident_t id_ref, int gtid, kmp_cmplx32 lhs,
	kmp_cmplx32 rhs, kmp_cmplx32 *out, int flag);
	void __kmpc_atomic_cmplx4_sub_cpt(ident_t id_ref, int gtid, kmp_cmplx32 lhs,
	kmp_cmplx32 rhs, kmp_cmplx32 *out, int flag);
	void __kmpc_atomic_cmplx4_mul_cpt(ident_t id_ref, int gtid, kmp_cmplx32 lhs,
	kmp_cmplx32 rhs, kmp_cmplx32 *out, int flag);
	void __kmpc_atomic_cmplx4_div_cpt(ident_t id_ref, int gtid, kmp_cmplx32 lhs,
	kmp_cmplx32 rhs, kmp_cmplx32 *out, int flag);

	kmp_cmplx64 __kmpc_atomic_cmplx8_add_cpt(ident_t *id_ref, int gtid,
	kmp_cmplx64 *lhs, kmp_cmplx64 rhs,
	int flag);
	kmp_cmplx64 __kmpc_atomic_cmplx8_sub_cpt(ident_t *id_ref, int gtid,
	kmp_cmplx64 *lhs, kmp_cmplx64 rhs,
	int flag);
	kmp_cmplx64 __kmpc_atomic_cmplx8_mul_cpt(ident_t *id_ref, int gtid,
	kmp_cmplx64 *lhs, kmp_cmplx64 rhs,
	int flag);
	kmp_cmplx64 __kmpc_atomic_cmplx8_div_cpt(ident_t *id_ref, int gtid,
	kmp_cmplx64 *lhs, kmp_cmplx64 rhs,
	int flag);
	kmp_cmplx80 __kmpc_atomic_cmplx10_add_cpt(ident_t *id_ref, int gtid,
	kmp_cmplx80 *lhs, kmp_cmplx80 rhs,
	int flag);
	kmp_cmplx80 __kmpc_atomic_cmplx10_sub_cpt(ident_t *id_ref, int gtid,
	kmp_cmplx80 *lhs, kmp_cmplx80 rhs,
	int flag);
	kmp_cmplx80 __kmpc_atomic_cmplx10_mul_cpt(ident_t *id_ref, int gtid,
	kmp_cmplx80 *lhs, kmp_cmplx80 rhs,
	int flag);
	kmp_cmplx80 __kmpc_atomic_cmplx10_div_cpt(ident_t *id_ref, int gtid,
	kmp_cmplx80 *lhs, kmp_cmplx80 rhs,
	int flag);
	#if KMP_HAVE_QUAD
	CPLX128_LEG __kmpc_atomic_cmplx16_add_cpt(ident_t *id_ref, int gtid,
	CPLX128_LEG *lhs, CPLX128_LEG rhs,
	int flag);
	CPLX128_LEG __kmpc_atomic_cmplx16_sub_cpt(ident_t *id_ref, int gtid,
	CPLX128_LEG *lhs, CPLX128_LEG rhs,
	int flag);
	CPLX128_LEG __kmpc_atomic_cmplx16_mul_cpt(ident_t *id_ref, int gtid,
	CPLX128_LEG *lhs, CPLX128_LEG rhs,
	int flag);
	CPLX128_LEG __kmpc_atomic_cmplx16_div_cpt(ident_t *id_ref, int gtid,
	CPLX128_LEG *lhs, CPLX128_LEG rhs,
	int flag);
	#if (KMP_ARCH_X86)
	// Routines with 16-byte arguments aligned to 16-byte boundary
	Quad_a16_t __kmpc_atomic_float16_add_a16_cpt(ident_t *id_ref, int gtid,
	Quad_a16_t *lhs, Quad_a16_t rhs,
	int flag);
	Quad_a16_t __kmpc_atomic_float16_sub_a16_cpt(ident_t *id_ref, int gtid,
	Quad_a16_t *lhs, Quad_a16_t rhs,
	int flag);
	Quad_a16_t __kmpc_atomic_float16_mul_a16_cpt(ident_t *id_ref, int gtid,
	Quad_a16_t *lhs, Quad_a16_t rhs,
	int flag);
	Quad_a16_t __kmpc_atomic_float16_div_a16_cpt(ident_t *id_ref, int gtid,
	Quad_a16_t *lhs, Quad_a16_t rhs,
	int flag);
	Quad_a16_t __kmpc_atomic_float16_max_a16_cpt(ident_t *id_ref, int gtid,
	Quad_a16_t *lhs, Quad_a16_t rhs,
	int flag);
	Quad_a16_t __kmpc_atomic_float16_min_a16_cpt(ident_t *id_ref, int gtid,
	Quad_a16_t *lhs, Quad_a16_t rhs,
	int flag);
	kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_add_a16_cpt(ident_t *id_ref, int gtid,
	kmp_cmplx128_a16_t *lhs,
	kmp_cmplx128_a16_t rhs,
	int flag);
	kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_sub_a16_cpt(ident_t *id_ref, int gtid,
	kmp_cmplx128_a16_t *lhs,
	kmp_cmplx128_a16_t rhs,
	int flag);
	kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_mul_a16_cpt(ident_t *id_ref, int gtid,
	kmp_cmplx128_a16_t *lhs,
	kmp_cmplx128_a16_t rhs,
	int flag);
	kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_div_a16_cpt(ident_t *id_ref, int gtid,
	kmp_cmplx128_a16_t *lhs,
	kmp_cmplx128_a16_t rhs,
	int flag);
	#endif
	#endif

	void __kmpc_atomic_start(void);
	void __kmpc_atomic_end(void);

	#if OMP_40_ENABLED

	// OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr
	// binop x; v = x; } for non-commutative operations.

	char __kmpc_atomic_fixed1_sub_cpt_rev(ident_t id_ref, int gtid, char lhs,
	char rhs, int flag);
	char __kmpc_atomic_fixed1_div_cpt_rev(ident_t id_ref, int gtid, char lhs,
	char rhs, int flag);
	unsigned char __kmpc_atomic_fixed1u_div_cpt_rev(ident_t *id_ref, int gtid,
	unsigned char *lhs,
	unsigned char rhs, int flag);
	char __kmpc_atomic_fixed1_shl_cpt_rev(ident_t id_ref, int gtid, char lhs,
	char rhs, int flag);
	char __kmpc_atomic_fixed1_shr_cpt_rev(ident_t id_ref, int gtid, char lhs,
	char rhs, int flag);
	unsigned char __kmpc_atomic_fixed1u_shr_cpt_rev(ident_t *id_ref, int gtid,
	unsigned char *lhs,
	unsigned char rhs, int flag);
	short __kmpc_atomic_fixed2_sub_cpt_rev(ident_t id_ref, int gtid, short lhs,
	short rhs, int flag);
	short __kmpc_atomic_fixed2_div_cpt_rev(ident_t id_ref, int gtid, short lhs,
	short rhs, int flag);
	unsigned short __kmpc_atomic_fixed2u_div_cpt_rev(ident_t *id_ref, int gtid,
	unsigned short *lhs,
	unsigned short rhs, int flag);
	short __kmpc_atomic_fixed2_shl_cpt_rev(ident_t id_ref, int gtid, short lhs,
	short rhs, int flag);
	short __kmpc_atomic_fixed2_shr_cpt_rev(ident_t id_ref, int gtid, short lhs,
	short rhs, int flag);
	unsigned short __kmpc_atomic_fixed2u_shr_cpt_rev(ident_t *id_ref, int gtid,
	unsigned short *lhs,
	unsigned short rhs, int flag);
	kmp_int32 __kmpc_atomic_fixed4_sub_cpt_rev(ident_t *id_ref, int gtid,
	kmp_int32 *lhs, kmp_int32 rhs,
	int flag);
	kmp_int32 __kmpc_atomic_fixed4_div_cpt_rev(ident_t *id_ref, int gtid,
	kmp_int32 *lhs, kmp_int32 rhs,
	int flag);
	kmp_uint32 __kmpc_atomic_fixed4u_div_cpt_rev(ident_t *id_ref, int gtid,
	kmp_uint32 *lhs, kmp_uint32 rhs,
	int flag);
	kmp_int32 __kmpc_atomic_fixed4_shl_cpt_rev(ident_t *id_ref, int gtid,
	kmp_int32 *lhs, kmp_int32 rhs,
	int flag);
	kmp_int32 __kmpc_atomic_fixed4_shr_cpt_rev(ident_t *id_ref, int gtid,
	kmp_int32 *lhs, kmp_int32 rhs,
	int flag);
	kmp_uint32 __kmpc_atomic_fixed4u_shr_cpt_rev(ident_t *id_ref, int gtid,
	kmp_uint32 *lhs, kmp_uint32 rhs,
	int flag);
	kmp_int64 __kmpc_atomic_fixed8_sub_cpt_rev(ident_t *id_ref, int gtid,
	kmp_int64 *lhs, kmp_int64 rhs,
	int flag);
	kmp_int64 __kmpc_atomic_fixed8_div_cpt_rev(ident_t *id_ref, int gtid,
	kmp_int64 *lhs, kmp_int64 rhs,
	int flag);
	kmp_uint64 __kmpc_atomic_fixed8u_div_cpt_rev(ident_t *id_ref, int gtid,
	kmp_uint64 *lhs, kmp_uint64 rhs,
	int flag);
	kmp_int64 __kmpc_atomic_fixed8_shl_cpt_rev(ident_t *id_ref, int gtid,
	kmp_int64 *lhs, kmp_int64 rhs,
	int flag);
	kmp_int64 __kmpc_atomic_fixed8_shr_cpt_rev(ident_t *id_ref, int gtid,
	kmp_int64 *lhs, kmp_int64 rhs,
	int flag);
	kmp_uint64 __kmpc_atomic_fixed8u_shr_cpt_rev(ident_t *id_ref, int gtid,
	kmp_uint64 *lhs, kmp_uint64 rhs,
	int flag);
	float __kmpc_atomic_float4_sub_cpt_rev(ident_t id_ref, int gtid, float lhs,
	float rhs, int flag);
	float __kmpc_atomic_float4_div_cpt_rev(ident_t id_ref, int gtid, float lhs,
	float rhs, int flag);
	double __kmpc_atomic_float8_sub_cpt_rev(ident_t id_ref, int gtid, double lhs,
	double rhs, int flag);
	double __kmpc_atomic_float8_div_cpt_rev(ident_t id_ref, int gtid, double lhs,
	double rhs, int flag);
	long double __kmpc_atomic_float10_sub_cpt_rev(ident_t *id_ref, int gtid,
	long double *lhs, long double rhs,
	int flag);
	long double __kmpc_atomic_float10_div_cpt_rev(ident_t *id_ref, int gtid,
	long double *lhs, long double rhs,
	int flag);
	#if KMP_HAVE_QUAD
	QUAD_LEGACY __kmpc_atomic_float16_sub_cpt_rev(ident_t *id_ref, int gtid,
	QUAD_LEGACY *lhs, QUAD_LEGACY rhs,
	int flag);
	QUAD_LEGACY __kmpc_atomic_float16_div_cpt_rev(ident_t *id_ref, int gtid,
	QUAD_LEGACY *lhs, QUAD_LEGACY rhs,
	int flag);
	#endif
	// Workaround for cmplx4 routines - return void; captured value is returned via
	// the argument
	void __kmpc_atomic_cmplx4_sub_cpt_rev(ident_t *id_ref, int gtid,
	kmp_cmplx32 *lhs, kmp_cmplx32 rhs,
	kmp_cmplx32 *out, int flag);
	void __kmpc_atomic_cmplx4_div_cpt_rev(ident_t *id_ref, int gtid,
	kmp_cmplx32 *lhs, kmp_cmplx32 rhs,
	kmp_cmplx32 *out, int flag);
	kmp_cmplx64 __kmpc_atomic_cmplx8_sub_cpt_rev(ident_t *id_ref, int gtid,
	kmp_cmplx64 *lhs, kmp_cmplx64 rhs,
	int flag);
	kmp_cmplx64 __kmpc_atomic_cmplx8_div_cpt_rev(ident_t *id_ref, int gtid,
	kmp_cmplx64 *lhs, kmp_cmplx64 rhs,
	int flag);
	kmp_cmplx80 __kmpc_atomic_cmplx10_sub_cpt_rev(ident_t *id_ref, int gtid,
	kmp_cmplx80 *lhs, kmp_cmplx80 rhs,
	int flag);
	kmp_cmplx80 __kmpc_atomic_cmplx10_div_cpt_rev(ident_t *id_ref, int gtid,
	kmp_cmplx80 *lhs, kmp_cmplx80 rhs,
	int flag);
	#if KMP_HAVE_QUAD
	CPLX128_LEG __kmpc_atomic_cmplx16_sub_cpt_rev(ident_t *id_ref, int gtid,
	CPLX128_LEG *lhs, CPLX128_LEG rhs,
	int flag);
	CPLX128_LEG __kmpc_atomic_cmplx16_div_cpt_rev(ident_t *id_ref, int gtid,
	CPLX128_LEG *lhs, CPLX128_LEG rhs,
	int flag);
	#if (KMP_ARCH_X86)
	Quad_a16_t __kmpc_atomic_float16_sub_a16_cpt_rev(ident_t *id_ref, int gtid,
	Quad_a16_t *lhs,
	Quad_a16_t rhs, int flag);
	Quad_a16_t __kmpc_atomic_float16_div_a16_cpt_rev(ident_t *id_ref, int gtid,
	Quad_a16_t *lhs,
	Quad_a16_t rhs, int flag);
	kmp_cmplx128_a16_t
	__kmpc_atomic_cmplx16_sub_a16_cpt_rev(ident_t *id_ref, int gtid,
	kmp_cmplx128_a16_t *lhs,
	kmp_cmplx128_a16_t rhs, int flag);
	kmp_cmplx128_a16_t
	__kmpc_atomic_cmplx16_div_a16_cpt_rev(ident_t *id_ref, int gtid,
	kmp_cmplx128_a16_t *lhs,
	kmp_cmplx128_a16_t rhs, int flag);
	#endif
	#endif

	// OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
	char __kmpc_atomic_fixed1_swp(ident_t id_ref, int gtid, char lhs, char rhs);
	short __kmpc_atomic_fixed2_swp(ident_t id_ref, int gtid, short lhs,
	short rhs);
	kmp_int32 __kmpc_atomic_fixed4_swp(ident_t id_ref, int gtid, kmp_int32 lhs,
	kmp_int32 rhs);
	kmp_int64 __kmpc_atomic_fixed8_swp(ident_t id_ref, int gtid, kmp_int64 lhs,
	kmp_int64 rhs);
	float __kmpc_atomic_float4_swp(ident_t id_ref, int gtid, float lhs,
	float rhs);
	double __kmpc_atomic_float8_swp(ident_t id_ref, int gtid, double lhs,
	double rhs);
	long double __kmpc_atomic_float10_swp(ident_t *id_ref, int gtid,
	long double *lhs, long double rhs);
	#if KMP_HAVE_QUAD
	QUAD_LEGACY __kmpc_atomic_float16_swp(ident_t *id_ref, int gtid,
	QUAD_LEGACY *lhs, QUAD_LEGACY rhs);
	#endif
	// !!! TODO: check if we need a workaround here
	void __kmpc_atomic_cmplx4_swp(ident_t id_ref, int gtid, kmp_cmplx32 lhs,
	kmp_cmplx32 rhs, kmp_cmplx32 *out);
	// kmp_cmplx32 __kmpc_atomic_cmplx4_swp( ident_t *id_ref, int gtid,
	// kmp_cmplx32 * lhs, kmp_cmplx32 rhs );

	kmp_cmplx64 __kmpc_atomic_cmplx8_swp(ident_t *id_ref, int gtid,
	kmp_cmplx64 *lhs, kmp_cmplx64 rhs);
	kmp_cmplx80 __kmpc_atomic_cmplx10_swp(ident_t *id_ref, int gtid,
	kmp_cmplx80 *lhs, kmp_cmplx80 rhs);
	#if KMP_HAVE_QUAD
	CPLX128_LEG __kmpc_atomic_cmplx16_swp(ident_t *id_ref, int gtid,
	CPLX128_LEG *lhs, CPLX128_LEG rhs);
	#if (KMP_ARCH_X86)
	Quad_a16_t __kmpc_atomic_float16_a16_swp(ident_t *id_ref, int gtid,
	Quad_a16_t *lhs, Quad_a16_t rhs);
	kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_a16_swp(ident_t *id_ref, int gtid,
	kmp_cmplx128_a16_t *lhs,
	kmp_cmplx128_a16_t rhs);
	#endif
	#endif

	// Capture routines for mixed types (RHS=float16)
	#if KMP_HAVE_QUAD

	char __kmpc_atomic_fixed1_add_cpt_fp(ident_t id_ref, int gtid, char lhs,
	_Quad rhs, int flag);
	char __kmpc_atomic_fixed1_sub_cpt_fp(ident_t id_ref, int gtid, char lhs,
	_Quad rhs, int flag);
	char __kmpc_atomic_fixed1_mul_cpt_fp(ident_t id_ref, int gtid, char lhs,
	_Quad rhs, int flag);
	char __kmpc_atomic_fixed1_div_cpt_fp(ident_t id_ref, int gtid, char lhs,
	_Quad rhs, int flag);
	unsigned char __kmpc_atomic_fixed1u_add_cpt_fp(ident_t *id_ref, int gtid,
	unsigned char *lhs, _Quad rhs,
	int flag);
	unsigned char __kmpc_atomic_fixed1u_sub_cpt_fp(ident_t *id_ref, int gtid,
	unsigned char *lhs, _Quad rhs,
	int flag);
	unsigned char __kmpc_atomic_fixed1u_mul_cpt_fp(ident_t *id_ref, int gtid,
	unsigned char *lhs, _Quad rhs,
	int flag);
	unsigned char __kmpc_atomic_fixed1u_div_cpt_fp(ident_t *id_ref, int gtid,
	unsigned char *lhs, _Quad rhs,
	int flag);

	short __kmpc_atomic_fixed2_add_cpt_fp(ident_t id_ref, int gtid, short lhs,
	_Quad rhs, int flag);
	short __kmpc_atomic_fixed2_sub_cpt_fp(ident_t id_ref, int gtid, short lhs,
	_Quad rhs, int flag);
	short __kmpc_atomic_fixed2_mul_cpt_fp(ident_t id_ref, int gtid, short lhs,
	_Quad rhs, int flag);
	short __kmpc_atomic_fixed2_div_cpt_fp(ident_t id_ref, int gtid, short lhs,
	_Quad rhs, int flag);
	unsigned short __kmpc_atomic_fixed2u_add_cpt_fp(ident_t *id_ref, int gtid,
	unsigned short *lhs, _Quad rhs,
	int flag);
	unsigned short __kmpc_atomic_fixed2u_sub_cpt_fp(ident_t *id_ref, int gtid,
	unsigned short *lhs, _Quad rhs,
	int flag);
	unsigned short __kmpc_atomic_fixed2u_mul_cpt_fp(ident_t *id_ref, int gtid,
	unsigned short *lhs, _Quad rhs,
	int flag);
	unsigned short __kmpc_atomic_fixed2u_div_cpt_fp(ident_t *id_ref, int gtid,
	unsigned short *lhs, _Quad rhs,
	int flag);

	kmp_int32 __kmpc_atomic_fixed4_add_cpt_fp(ident_t *id_ref, int gtid,
	kmp_int32 *lhs, _Quad rhs, int flag);
	kmp_int32 __kmpc_atomic_fixed4_sub_cpt_fp(ident_t *id_ref, int gtid,
	kmp_int32 *lhs, _Quad rhs, int flag);
	kmp_int32 __kmpc_atomic_fixed4_mul_cpt_fp(ident_t *id_ref, int gtid,
	kmp_int32 *lhs, _Quad rhs, int flag);
	kmp_int32 __kmpc_atomic_fixed4_div_cpt_fp(ident_t *id_ref, int gtid,
	kmp_int32 *lhs, _Quad rhs, int flag);
	kmp_uint32 __kmpc_atomic_fixed4u_add_cpt_fp(ident_t *id_ref, int gtid,
	kmp_uint32 *lhs, _Quad rhs,
	int flag);
	kmp_uint32 __kmpc_atomic_fixed4u_sub_cpt_fp(ident_t *id_ref, int gtid,
	kmp_uint32 *lhs, _Quad rhs,
	int flag);
	kmp_uint32 __kmpc_atomic_fixed4u_mul_cpt_fp(ident_t *id_ref, int gtid,
	kmp_uint32 *lhs, _Quad rhs,
	int flag);
	kmp_uint32 __kmpc_atomic_fixed4u_div_cpt_fp(ident_t *id_ref, int gtid,
	kmp_uint32 *lhs, _Quad rhs,
	int flag);

	kmp_int64 __kmpc_atomic_fixed8_add_cpt_fp(ident_t *id_ref, int gtid,
	kmp_int64 *lhs, _Quad rhs, int flag);
	kmp_int64 __kmpc_atomic_fixed8_sub_cpt_fp(ident_t *id_ref, int gtid,
	kmp_int64 *lhs, _Quad rhs, int flag);
	kmp_int64 __kmpc_atomic_fixed8_mul_cpt_fp(ident_t *id_ref, int gtid,
	kmp_int64 *lhs, _Quad rhs, int flag);
	kmp_int64 __kmpc_atomic_fixed8_div_cpt_fp(ident_t *id_ref, int gtid,
	kmp_int64 *lhs, _Quad rhs, int flag);
	kmp_uint64 __kmpc_atomic_fixed8u_add_cpt_fp(ident_t *id_ref, int gtid,
	kmp_uint64 *lhs, _Quad rhs,
	int flag);
	kmp_uint64 __kmpc_atomic_fixed8u_sub_cpt_fp(ident_t *id_ref, int gtid,
	kmp_uint64 *lhs, _Quad rhs,
	int flag);
	kmp_uint64 __kmpc_atomic_fixed8u_mul_cpt_fp(ident_t *id_ref, int gtid,
	kmp_uint64 *lhs, _Quad rhs,
	int flag);
	kmp_uint64 __kmpc_atomic_fixed8u_div_cpt_fp(ident_t *id_ref, int gtid,
	kmp_uint64 *lhs, _Quad rhs,
	int flag);

	float __kmpc_atomic_float4_add_cpt_fp(ident_t *id_ref, int gtid,
	kmp_real32 *lhs, _Quad rhs, int flag);
	float __kmpc_atomic_float4_sub_cpt_fp(ident_t *id_ref, int gtid,
	kmp_real32 *lhs, _Quad rhs, int flag);
	float __kmpc_atomic_float4_mul_cpt_fp(ident_t *id_ref, int gtid,
	kmp_real32 *lhs, _Quad rhs, int flag);
	float __kmpc_atomic_float4_div_cpt_fp(ident_t *id_ref, int gtid,
	kmp_real32 *lhs, _Quad rhs, int flag);

	double __kmpc_atomic_float8_add_cpt_fp(ident_t *id_ref, int gtid,
	kmp_real64 *lhs, _Quad rhs, int flag);
	double __kmpc_atomic_float8_sub_cpt_fp(ident_t *id_ref, int gtid,
	kmp_real64 *lhs, _Quad rhs, int flag);
	double __kmpc_atomic_float8_mul_cpt_fp(ident_t *id_ref, int gtid,
	kmp_real64 *lhs, _Quad rhs, int flag);
	double __kmpc_atomic_float8_div_cpt_fp(ident_t *id_ref, int gtid,
	kmp_real64 *lhs, _Quad rhs, int flag);

	long double __kmpc_atomic_float10_add_cpt_fp(ident_t *id_ref, int gtid,
	long double *lhs, _Quad rhs,
	int flag);
	long double __kmpc_atomic_float10_sub_cpt_fp(ident_t *id_ref, int gtid,
	long double *lhs, _Quad rhs,
	int flag);
	long double __kmpc_atomic_float10_mul_cpt_fp(ident_t *id_ref, int gtid,
	long double *lhs, _Quad rhs,
	int flag);
	long double __kmpc_atomic_float10_div_cpt_fp(ident_t *id_ref, int gtid,
	long double *lhs, _Quad rhs,
	int flag);

	char __kmpc_atomic_fixed1_sub_cpt_rev_fp(ident_t id_ref, int gtid, char lhs,
	_Quad rhs, int flag);
	unsigned char __kmpc_atomic_fixed1u_sub_cpt_rev_fp(ident_t *id_ref, int gtid,
	unsigned char *lhs,
	_Quad rhs, int flag);
	char __kmpc_atomic_fixed1_div_cpt_rev_fp(ident_t id_ref, int gtid, char lhs,
	_Quad rhs, int flag);
	unsigned char __kmpc_atomic_fixed1u_div_cpt_rev_fp(ident_t *id_ref, int gtid,
	unsigned char *lhs,
	_Quad rhs, int flag);
	short __kmpc_atomic_fixed2_sub_cpt_rev_fp(ident_t id_ref, int gtid, short lhs,
	_Quad rhs, int flag);
	unsigned short __kmpc_atomic_fixed2u_sub_cpt_rev_fp(ident_t *id_ref, int gtid,
	unsigned short *lhs,
	_Quad rhs, int flag);
	short __kmpc_atomic_fixed2_div_cpt_rev_fp(ident_t id_ref, int gtid, short lhs,
	_Quad rhs, int flag);
	unsigned short __kmpc_atomic_fixed2u_div_cpt_rev_fp(ident_t *id_ref, int gtid,
	unsigned short *lhs,
	_Quad rhs, int flag);
	kmp_int32 __kmpc_atomic_fixed4_sub_cpt_rev_fp(ident_t *id_ref, int gtid,
	kmp_int32 *lhs, _Quad rhs,
	int flag);
	kmp_uint32 __kmpc_atomic_fixed4u_sub_cpt_rev_fp(ident_t *id_ref, int gtid,
	kmp_uint32 *lhs, _Quad rhs,
	int flag);
	kmp_int32 __kmpc_atomic_fixed4_div_cpt_rev_fp(ident_t *id_ref, int gtid,
	kmp_int32 *lhs, _Quad rhs,
	int flag);
	kmp_uint32 __kmpc_atomic_fixed4u_div_cpt_rev_fp(ident_t *id_ref, int gtid,
	kmp_uint32 *lhs, _Quad rhs,
	int flag);
	kmp_int64 __kmpc_atomic_fixed8_sub_cpt_rev_fp(ident_t *id_ref, int gtid,
	kmp_int64 *lhs, _Quad rhs,
	int flag);
	kmp_uint64 __kmpc_atomic_fixed8u_sub_cpt_rev_fp(ident_t *id_ref, int gtid,
	kmp_uint64 *lhs, _Quad rhs,
	int flag);
	kmp_int64 __kmpc_atomic_fixed8_div_cpt_rev_fp(ident_t *id_ref, int gtid,
	kmp_int64 *lhs, _Quad rhs,
	int flag);
	kmp_uint64 __kmpc_atomic_fixed8u_div_cpt_rev_fp(ident_t *id_ref, int gtid,
	kmp_uint64 *lhs, _Quad rhs,
	int flag);
	float __kmpc_atomic_float4_sub_cpt_rev_fp(ident_t id_ref, int gtid, float lhs,
	_Quad rhs, int flag);
	float __kmpc_atomic_float4_div_cpt_rev_fp(ident_t id_ref, int gtid, float lhs,
	_Quad rhs, int flag);
	double __kmpc_atomic_float8_sub_cpt_rev_fp(ident_t *id_ref, int gtid,
	double *lhs, _Quad rhs, int flag);
	double __kmpc_atomic_float8_div_cpt_rev_fp(ident_t *id_ref, int gtid,
	double *lhs, _Quad rhs, int flag);
	long double __kmpc_atomic_float10_sub_cpt_rev_fp(ident_t *id_ref, int gtid,
	long double *lhs, _Quad rhs,
	int flag);
	long double __kmpc_atomic_float10_div_cpt_rev_fp(ident_t *id_ref, int gtid,
	long double *lhs, _Quad rhs,
	int flag);

	#endif // KMP_HAVE_QUAD

	// End of OpenMP 4.0 capture

	#endif // OMP_40_ENABLED

	#endif // KMP_ARCH_X86 \|\| KMP_ARCH_X86_64

	/* ------------------------------------------------------------------------ */

	#ifdef __cplusplus
	} // extern "C"
	#endif

	#endif /* KMP_ATOMIC_H */

	// end of file
	Index: head/contrib/openmp/runtime/src/kmp_csupport.cpp
	===================================================================
	--- head/contrib/openmp/runtime/src/kmp_csupport.cpp (revision 349792)
	+++ head/contrib/openmp/runtime/src/kmp_csupport.cpp (revision 349793)
	@@ -1,4164 +1,4173 @@
	/*
	* kmp_csupport.cpp -- kfront linkage support for OpenMP.
	*/

	//===----------------------------------------------------------------------===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is dual licensed under the MIT and the University of Illinois Open
	// Source Licenses. See LICENSE.txt for details.
	//
	//===----------------------------------------------------------------------===//

	#define __KMP_IMP
	#include "omp.h" /* extern "C" declarations of user-visible routines */
	#include "kmp.h"
	#include "kmp_error.h"
	#include "kmp_i18n.h"
	#include "kmp_itt.h"
	#include "kmp_lock.h"
	#include "kmp_stats.h"

	#if OMPT_SUPPORT
	#include "ompt-specific.h"
	#endif

	#define MAX_MESSAGE 512

	// flags will be used in future, e.g. to implement openmp_strict library
	// restrictions

	/*!
	* @ingroup STARTUP_SHUTDOWN
	* @param loc in source location information
	* @param flags in for future use (currently ignored)
	*
	* Initialize the runtime library. This call is optional; if it is not made then
	* it will be implicitly called by attempts to use other library functions.
	*/
	void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
	// By default __kmpc_begin() is no-op.
	char *env;
	if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
	__kmp_str_match_true(env)) {
	__kmp_middle_initialize();
	KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
	} else if (__kmp_ignore_mppbeg() == FALSE) {
	// By default __kmp_ignore_mppbeg() returns TRUE.
	__kmp_internal_begin();
	KC_TRACE(10, ("__kmpc_begin: called\n"));
	}
	}

	/*!
	* @ingroup STARTUP_SHUTDOWN
	* @param loc source location information
	*
	* Shutdown the runtime library. This is also optional, and even if called will
	* not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to
	* zero.
	*/
	void __kmpc_end(ident_t *loc) {
	// By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
	// call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
	// environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
	// returns FALSE and __kmpc_end() will unregister this root (it can cause
	// library shut down).
	if (__kmp_ignore_mppend() == FALSE) {
	KC_TRACE(10, ("__kmpc_end: called\n"));
	KA_TRACE(30, ("__kmpc_end\n"));

	__kmp_internal_end_thread(-1);
	}
	#if KMP_OS_WINDOWS && OMPT_SUPPORT
	// Normal exit process on Windows does not allow worker threads of the final
	// parallel region to finish reporting their events, so shutting down the
	// library here fixes the issue at least for the cases where __kmpc_end() is
	// placed properly.
	if (ompt_enabled.enabled)
	__kmp_internal_end_library(__kmp_gtid_get_specific());
	#endif
	}

	/*!
	@ingroup THREAD_STATES
	@param loc Source location information.
	@return The global thread index of the active thread.

	This function can be called in any context.

	If the runtime has ony been entered at the outermost level from a
	single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is
	that which would be returned by omp_get_thread_num() in the outermost
	active parallel construct. (Or zero if there is no active parallel
	construct, since the master thread is necessarily thread zero).

	If multiple non-OpenMP threads all enter an OpenMP construct then this
	will be a unique thread identifier among all the threads created by
	the OpenMP runtime (but the value cannote be defined in terms of
	OpenMP thread ids returned by omp_get_thread_num()).
	*/
	kmp_int32 __kmpc_global_thread_num(ident_t *loc) {
	kmp_int32 gtid = __kmp_entry_gtid();

	KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));

	return gtid;
	}

	/*!
	@ingroup THREAD_STATES
	@param loc Source location information.
	@return The number of threads under control of the OpenMP<sup>*</sup> runtime

	This function can be called in any context.
	It returns the total number of threads under the control of the OpenMP runtime.
	That is not a number that can be determined by any OpenMP standard calls, since
	the library may be called from more than one non-OpenMP thread, and this
	reflects the total over all such calls. Similarly the runtime maintains
	underlying threads even when they are not active (since the cost of creating
	and destroying OS threads is high), this call counts all such threads even if
	they are not waiting for work.
	*/
	kmp_int32 __kmpc_global_num_threads(ident_t *loc) {
	KC_TRACE(10,
	("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));

	return TCR_4(__kmp_all_nth);
	}

	/*!
	@ingroup THREAD_STATES
	@param loc Source location information.
	@return The thread number of the calling thread in the innermost active parallel
	construct.
	*/
	kmp_int32 __kmpc_bound_thread_num(ident_t *loc) {
	KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
	return __kmp_tid_from_gtid(__kmp_entry_gtid());
	}

	/*!
	@ingroup THREAD_STATES
	@param loc Source location information.
	@return The number of threads in the innermost active parallel construct.
	*/
	kmp_int32 __kmpc_bound_num_threads(ident_t *loc) {
	KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));

	return __kmp_entry_thread()->th.th_team->t.t_nproc;
	}

	/*!
	* @ingroup DEPRECATED
	* @param loc location description
	*
	* This function need not be called. It always returns TRUE.
	*/
	kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
	#ifndef KMP_DEBUG

	return TRUE;

	#else

	const char *semi2;
	const char *semi3;
	int line_no;

	if (__kmp_par_range == 0) {
	return TRUE;
	}
	semi2 = loc->psource;
	if (semi2 == NULL) {
	return TRUE;
	}
	semi2 = strchr(semi2, ';');
	if (semi2 == NULL) {
	return TRUE;
	}
	semi2 = strchr(semi2 + 1, ';');
	if (semi2 == NULL) {
	return TRUE;
	}
	if (__kmp_par_range_filename[0]) {
	const char *name = semi2 - 1;
	while ((name > loc->psource) && (name != '/') && (name != ';')) {
	name--;
	}
	if ((name == '/') \|\| (name == ';')) {
	name++;
	}
	if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
	return __kmp_par_range < 0;
	}
	}
	semi3 = strchr(semi2 + 1, ';');
	if (__kmp_par_range_routine[0]) {
	if ((semi3 != NULL) && (semi3 > semi2) &&
	(strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
	return __kmp_par_range < 0;
	}
	}
	if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
	if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
	return __kmp_par_range > 0;
	}
	return __kmp_par_range < 0;
	}
	return TRUE;

	#endif /* KMP_DEBUG */
	}

	/*!
	@ingroup THREAD_STATES
	@param loc Source location information.
	@return 1 if this thread is executing inside an active parallel region, zero if
	not.
	*/
	kmp_int32 __kmpc_in_parallel(ident_t *loc) {
	return __kmp_entry_thread()->th.th_root->r.r_active;
	}

	/*!
	@ingroup PARALLEL
	@param loc source location information
	@param global_tid global thread number
	@param num_threads number of threads requested for this parallel construct

	Set the number of threads to be used by the next fork spawned by this thread.
	This call is only required if the parallel construct has a `num_threads` clause.
	*/
	void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
	kmp_int32 num_threads) {
	KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
	global_tid, num_threads));

	__kmp_push_num_threads(loc, global_tid, num_threads);
	}

	void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
	KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));

	/* the num_threads are automatically popped */
	}

	#if OMP_40_ENABLED

	void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
	kmp_int32 proc_bind) {
	KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
	proc_bind));

	__kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
	}

	#endif /* OMP_40_ENABLED */

	/*!
	@ingroup PARALLEL
	@param loc source location information
	@param argc total number of arguments in the ellipsis
	@param microtask pointer to callback routine consisting of outlined parallel
	construct
	@param ... pointers to shared variables that aren't global

	Do the actual fork and call the microtask in the relevant number of threads.
	*/
	void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
	int gtid = __kmp_entry_gtid();

	#if (KMP_STATS_ENABLED)
	// If we were in a serial region, then stop the serial timer, record
	// the event, and start parallel region timer
	stats_state_e previous_state = KMP_GET_THREAD_STATE();
	if (previous_state == stats_state_e::SERIAL_REGION) {
	KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
	} else {
	KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
	}
	int inParallel = __kmpc_in_parallel(loc);
	if (inParallel) {
	KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
	} else {
	KMP_COUNT_BLOCK(OMP_PARALLEL);
	}
	#endif

	// maybe to save thr_state is enough here
	{
	va_list ap;
	va_start(ap, microtask);

	#if OMPT_SUPPORT
	ompt_frame_t *ompt_frame;
	if (ompt_enabled.enabled) {
	kmp_info_t *master_th = __kmp_threads[gtid];
	kmp_team_t *parent_team = master_th->th.th_team;
	ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
	if (lwt)
	ompt_frame = &(lwt->ompt_task_info.frame);
	else {
	int tid = __kmp_tid_from_gtid(gtid);
	ompt_frame = &(
	parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
	}
	ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
	OMPT_STORE_RETURN_ADDRESS(gtid);
	}
	#endif

	#if INCLUDE_SSC_MARKS
	SSC_MARK_FORKING();
	#endif
	__kmp_fork_call(loc, gtid, fork_context_intel, argc,
	VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
	VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
	/* TODO: revert workaround for Intel(R) 64 tracker #96 */
	#if (KMP_ARCH_X86_64 \|\| KMP_ARCH_ARM \|\| KMP_ARCH_AARCH64) && KMP_OS_LINUX
	&ap
	#else
	ap
	#endif
	);
	#if INCLUDE_SSC_MARKS
	SSC_MARK_JOINING();
	#endif
	__kmp_join_call(loc, gtid
	#if OMPT_SUPPORT
	,
	fork_context_intel
	#endif
	);

	va_end(ap);
	}

	#if KMP_STATS_ENABLED
	if (previous_state == stats_state_e::SERIAL_REGION) {
	KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
	} else {
	KMP_POP_PARTITIONED_TIMER();
	}
	#endif // KMP_STATS_ENABLED
	}

	#if OMP_40_ENABLED
	/*!
	@ingroup PARALLEL
	@param loc source location information
	@param global_tid global thread number
	@param num_teams number of teams requested for the teams construct
	@param num_threads number of threads per team requested for the teams construct

	Set the number of teams to be used by the teams construct.
	This call is only required if the teams construct has a `num_teams` clause
	or a `thread_limit` clause (or both).
	*/
	void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
	kmp_int32 num_teams, kmp_int32 num_threads) {
	KA_TRACE(20,
	("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
	global_tid, num_teams, num_threads));

	__kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
	}

	/*!
	@ingroup PARALLEL
	@param loc source location information
	@param argc total number of arguments in the ellipsis
	@param microtask pointer to callback routine consisting of outlined teams
	construct
	@param ... pointers to shared variables that aren't global

	Do the actual fork and call the microtask in the relevant number of threads.
	*/
	void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
	...) {
	int gtid = __kmp_entry_gtid();
	kmp_info_t *this_thr = __kmp_threads[gtid];
	va_list ap;
	va_start(ap, microtask);

	KMP_COUNT_BLOCK(OMP_TEAMS);

	// remember teams entry point and nesting level
	this_thr->th.th_teams_microtask = microtask;
	this_thr->th.th_teams_level =
	this_thr->th.th_team->t.t_level; // AC: can be >0 on host

	#if OMPT_SUPPORT
	kmp_team_t *parent_team = this_thr->th.th_team;
	int tid = __kmp_tid_from_gtid(gtid);
	if (ompt_enabled.enabled) {
	parent_team->t.t_implicit_task_taskdata[tid]
	.ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
	}
	OMPT_STORE_RETURN_ADDRESS(gtid);
	#endif

	// check if __kmpc_push_num_teams called, set default number of teams
	// otherwise
	if (this_thr->th.th_teams_size.nteams == 0) {
	__kmp_push_num_teams(loc, gtid, 0, 0);
	}
	KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
	KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
	KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);

	__kmp_fork_call(loc, gtid, fork_context_intel, argc,
	VOLATILE_CAST(microtask_t)
	__kmp_teams_master, // "wrapped" task
	VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
	#if (KMP_ARCH_X86_64 \|\| KMP_ARCH_ARM \|\| KMP_ARCH_AARCH64) && KMP_OS_LINUX
	&ap
	#else
	ap
	#endif
	);
	__kmp_join_call(loc, gtid
	#if OMPT_SUPPORT
	,
	fork_context_intel
	#endif
	);

	this_thr->th.th_teams_microtask = NULL;
	this_thr->th.th_teams_level = 0;
	(kmp_int64 )(&this_thr->th.th_teams_size) = 0L;
	va_end(ap);
	}
	#endif /* OMP_40_ENABLED */

	// I don't think this function should ever have been exported.
	// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
	// openmp code ever called it, but it's been exported from the RTL for so
	// long that I'm afraid to remove the definition.
	int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }

	/*!
	@ingroup PARALLEL
	@param loc source location information
	@param global_tid global thread number

	Enter a serialized parallel construct. This interface is used to handle a
	conditional parallel region, like this,
	@code
	#pragma omp parallel if (condition)
	@endcode
	when the condition is false.
	*/
	void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
	// The implementation is now in kmp_runtime.cpp so that it can share static
	// functions with kmp_fork_call since the tasks to be done are similar in
	// each case.
	#if OMPT_SUPPORT
	OMPT_STORE_RETURN_ADDRESS(global_tid);
	#endif
	__kmp_serialized_parallel(loc, global_tid);
	}

	/*!
	@ingroup PARALLEL
	@param loc source location information
	@param global_tid global thread number

	Leave a serialized parallel construct.
	*/
	void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
	kmp_internal_control_t *top;
	kmp_info_t *this_thr;
	kmp_team_t *serial_team;

	KC_TRACE(10,
	("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));

	/* skip all this code for autopar serialized loops since it results in
	unacceptable overhead */
	if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
	return;

	// Not autopar code
	if (!TCR_4(__kmp_init_parallel))
	__kmp_parallel_initialize();

	this_thr = __kmp_threads[global_tid];
	serial_team = this_thr->th.th_serial_team;

	#if OMP_45_ENABLED
	kmp_task_team_t *task_team = this_thr->th.th_task_team;

	// we need to wait for the proxy tasks before finishing the thread
	if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
	__kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
	#endif

	KMP_MB();
	KMP_DEBUG_ASSERT(serial_team);
	KMP_ASSERT(serial_team->t.t_serialized);
	KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
	KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
	KMP_DEBUG_ASSERT(serial_team->t.t_threads);
	KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);

	#if OMPT_SUPPORT
	if (ompt_enabled.enabled &&
	this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
	OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
	if (ompt_enabled.ompt_callback_implicit_task) {
	ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
	ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
	OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
	}

	// reset clear the task id only after unlinking the task
	ompt_data_t *parent_task_data;
	__ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);

	if (ompt_enabled.ompt_callback_parallel_end) {
	ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
	&(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
	ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid));
	}
	__ompt_lw_taskteam_unlink(this_thr);
	this_thr->th.ompt_thread_info.state = ompt_state_overhead;
	}
	#endif

	/* If necessary, pop the internal control stack values and replace the team
	* values */
	top = serial_team->t.t_control_stack_top;
	if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
	copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
	serial_team->t.t_control_stack_top = top->next;
	__kmp_free(top);
	}

	// if( serial_team -> t.t_serialized > 1 )
	serial_team->t.t_level--;

	/* pop dispatch buffers stack */
	KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
	{
	dispatch_private_info_t *disp_buffer =
	serial_team->t.t_dispatch->th_disp_buffer;
	serial_team->t.t_dispatch->th_disp_buffer =
	serial_team->t.t_dispatch->th_disp_buffer->next;
	__kmp_free(disp_buffer);
	}
	#if OMP_50_ENABLED
	this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
	#endif

	--serial_team->t.t_serialized;
	if (serial_team->t.t_serialized == 0) {

	/* return to the parallel section */

	#if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
	if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
	__kmp_clear_x87_fpu_status_word();
	__kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
	__kmp_load_mxcsr(&serial_team->t.t_mxcsr);
	}
	#endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */

	this_thr->th.th_team = serial_team->t.t_parent;
	this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;

	/* restore values cached in the thread */
	this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
	this_thr->th.th_team_master =
	serial_team->t.t_parent->t.t_threads[0]; /* JPH */
	this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;

	/* TODO the below shouldn't need to be adjusted for serialized teams */
	this_thr->th.th_dispatch =
	&this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];

	__kmp_pop_current_task_from_thread(this_thr);

	KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
	this_thr->th.th_current_task->td_flags.executing = 1;

	if (__kmp_tasking_mode != tskm_immediate_exec) {
	// Copy the task team from the new child / old parent team to the thread.
	this_thr->th.th_task_team =
	this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
	KA_TRACE(20,
	("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
	"team %p\n",
	global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
	}
	} else {
	if (__kmp_tasking_mode != tskm_immediate_exec) {
	KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
	"depth of serial team %p to %d\n",
	global_tid, serial_team, serial_team->t.t_serialized));
	}
	}

	if (__kmp_env_consistency_check)
	__kmp_pop_parallel(global_tid, NULL);
	#if OMPT_SUPPORT
	if (ompt_enabled.enabled)
	this_thr->th.ompt_thread_info.state =
	((this_thr->th.th_team_serialized) ? ompt_state_work_serial
	: ompt_state_work_parallel);
	#endif
	}

	/*!
	@ingroup SYNCHRONIZATION
	@param loc source location information.

	Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
	depending on the memory ordering convention obeyed by the compiler
	even that may not be necessary).
	*/
	void __kmpc_flush(ident_t *loc) {
	KC_TRACE(10, ("__kmpc_flush: called\n"));

	/* need explicit __mf() here since use volatile instead in library */
	KMP_MB(); /* Flush all pending memory write invalidates. */

	#if (KMP_ARCH_X86 \|\| KMP_ARCH_X86_64)
	#if KMP_MIC
	// fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
	// We shouldn't need it, though, since the ABI rules require that
	// * If the compiler generates NGO stores it also generates the fence
	// * If users hand-code NGO stores they should insert the fence
	// therefore no incomplete unordered stores should be visible.
	#else
	// C74404
	// This is to address non-temporal store instructions (sfence needed).
	// The clflush instruction is addressed either (mfence needed).
	// Probably the non-temporal load monvtdqa instruction should also be
	// addressed.
	// mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
	if (!__kmp_cpuinfo.initialized) {
	__kmp_query_cpuid(&__kmp_cpuinfo);
	}
	if (!__kmp_cpuinfo.sse2) {
	// CPU cannot execute SSE2 instructions.
	} else {
	#if KMP_COMPILER_ICC
	_mm_mfence();
	#elif KMP_COMPILER_MSVC
	MemoryBarrier();
	#else
	__sync_synchronize();
	#endif // KMP_COMPILER_ICC
	}
	#endif // KMP_MIC
	#elif (KMP_ARCH_ARM \|\| KMP_ARCH_AARCH64 \|\| KMP_ARCH_MIPS \|\| KMP_ARCH_MIPS64)
	// Nothing to see here move along
	#elif KMP_ARCH_PPC64
	// Nothing needed here (we have a real MB above).
	#if KMP_OS_CNK
	// The flushing thread needs to yield here; this prevents a
	// busy-waiting thread from saturating the pipeline. flush is
	// often used in loops like this:
	// while (!flag) {
	// #pragma omp flush(flag)
	// }
	// and adding the yield here is good for at least a 10x speedup
	// when running >2 threads per core (on the NAS LU benchmark).
	__kmp_yield(TRUE);
	#endif
	#else
	#error Unknown or unsupported architecture
	#endif

	#if OMPT_SUPPORT && OMPT_OPTIONAL
	if (ompt_enabled.ompt_callback_flush) {
	ompt_callbacks.ompt_callback(ompt_callback_flush)(
	__ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
	}
	#endif
	}

	/* -------------------------------------------------------------------------- */
	/*!
	@ingroup SYNCHRONIZATION
	@param loc source location information
	@param global_tid thread id.

	Execute a barrier.
	*/
	void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
	KMP_COUNT_BLOCK(OMP_BARRIER);
	KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));

	if (!TCR_4(__kmp_init_parallel))
	__kmp_parallel_initialize();

	if (__kmp_env_consistency_check) {
	if (loc == 0) {
	KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
	}

	__kmp_check_barrier(global_tid, ct_barrier, loc);
	}

	#if OMPT_SUPPORT
	ompt_frame_t *ompt_frame;
	if (ompt_enabled.enabled) {
	__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
	if (ompt_frame->enter_frame.ptr == NULL)
	ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
	OMPT_STORE_RETURN_ADDRESS(global_tid);
	}
	#endif
	__kmp_threads[global_tid]->th.th_ident = loc;
	// TODO: explicit barrier_wait_id:
	// this function is called when 'barrier' directive is present or
	// implicit barrier at the end of a worksharing construct.
	// 1) better to add a per-thread barrier counter to a thread data structure
	// 2) set to 0 when a new team is created
	// 4) no sync is required

	__kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
	#if OMPT_SUPPORT && OMPT_OPTIONAL
	if (ompt_enabled.enabled) {
	ompt_frame->enter_frame = ompt_data_none;
	}
	#endif
	}

	/* The BARRIER for a MASTER section is always explicit */
	/*!
	@ingroup WORK_SHARING
	@param loc source location information.
	@param global_tid global thread number .
	@return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
	*/
	kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
	int status = 0;

	KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));

	if (!TCR_4(__kmp_init_parallel))
	__kmp_parallel_initialize();

	if (KMP_MASTER_GTID(global_tid)) {
	KMP_COUNT_BLOCK(OMP_MASTER);
	KMP_PUSH_PARTITIONED_TIMER(OMP_master);
	status = 1;
	}

	#if OMPT_SUPPORT && OMPT_OPTIONAL
	if (status) {
	if (ompt_enabled.ompt_callback_master) {
	kmp_info_t *this_thr = __kmp_threads[global_tid];
	kmp_team_t *team = this_thr->th.th_team;

	int tid = __kmp_tid_from_gtid(global_tid);
	ompt_callbacks.ompt_callback(ompt_callback_master)(
	ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
	&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
	OMPT_GET_RETURN_ADDRESS(0));
	}
	}
	#endif

	if (__kmp_env_consistency_check) {
	#if KMP_USE_DYNAMIC_LOCK
	if (status)
	__kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
	else
	__kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
	#else
	if (status)
	__kmp_push_sync(global_tid, ct_master, loc, NULL);
	else
	__kmp_check_sync(global_tid, ct_master, loc, NULL);
	#endif
	}

	return status;
	}

	/*!
	@ingroup WORK_SHARING
	@param loc source location information.
	@param global_tid global thread number .

	Mark the end of a <tt>master</tt> region. This should only be called by the
	thread that executes the <tt>master</tt> region.
	*/
	void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
	KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));

	KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
	KMP_POP_PARTITIONED_TIMER();

	#if OMPT_SUPPORT && OMPT_OPTIONAL
	kmp_info_t *this_thr = __kmp_threads[global_tid];
	kmp_team_t *team = this_thr->th.th_team;
	if (ompt_enabled.ompt_callback_master) {
	int tid = __kmp_tid_from_gtid(global_tid);
	ompt_callbacks.ompt_callback(ompt_callback_master)(
	ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
	&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
	OMPT_GET_RETURN_ADDRESS(0));
	}
	#endif

	if (__kmp_env_consistency_check) {
	if (global_tid < 0)
	KMP_WARNING(ThreadIdentInvalid);

	if (KMP_MASTER_GTID(global_tid))
	__kmp_pop_sync(global_tid, ct_master, loc);
	}
	}

	/*!
	@ingroup WORK_SHARING
	@param loc source location information.
	@param gtid global thread number.

	Start execution of an <tt>ordered</tt> construct.
	*/
	void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
	int cid = 0;
	kmp_info_t *th;
	KMP_DEBUG_ASSERT(__kmp_init_serial);

	KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));

	if (!TCR_4(__kmp_init_parallel))
	__kmp_parallel_initialize();

	#if USE_ITT_BUILD
	__kmp_itt_ordered_prep(gtid);
	// TODO: ordered_wait_id
	#endif /* USE_ITT_BUILD */

	th = __kmp_threads[gtid];

	#if OMPT_SUPPORT && OMPT_OPTIONAL
	kmp_team_t *team;
	ompt_wait_id_t lck;
	void *codeptr_ra;
	if (ompt_enabled.enabled) {
	OMPT_STORE_RETURN_ADDRESS(gtid);
	team = __kmp_team_from_gtid(gtid);
	- lck = (ompt_wait_id_t)&team->t.t_ordered.dt.t_value;
	+ lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
	/* OMPT state update */
	th->th.ompt_thread_info.wait_id = lck;
	th->th.ompt_thread_info.state = ompt_state_wait_ordered;

	/* OMPT event callback */
	codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
	if (ompt_enabled.ompt_callback_mutex_acquire) {
	ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
	- ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin,
	- (ompt_wait_id_t)lck, codeptr_ra);
	+ ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
	+ codeptr_ra);
	}
	}
	#endif

	if (th->th.th_dispatch->th_deo_fcn != 0)
	(*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
	else
	__kmp_parallel_deo(&gtid, &cid, loc);

	#if OMPT_SUPPORT && OMPT_OPTIONAL
	if (ompt_enabled.enabled) {
	/* OMPT state update */
	th->th.ompt_thread_info.state = ompt_state_work_parallel;
	th->th.ompt_thread_info.wait_id = 0;

	/* OMPT event callback */
	if (ompt_enabled.ompt_callback_mutex_acquired) {
	ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
	- ompt_mutex_ordered, (ompt_wait_id_t)lck, codeptr_ra);
	+ ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
	}
	}
	#endif

	#if USE_ITT_BUILD
	__kmp_itt_ordered_start(gtid);
	#endif /* USE_ITT_BUILD */
	}

	/*!
	@ingroup WORK_SHARING
	@param loc source location information.
	@param gtid global thread number.

	End execution of an <tt>ordered</tt> construct.
	*/
	void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
	int cid = 0;
	kmp_info_t *th;

	KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));

	#if USE_ITT_BUILD
	__kmp_itt_ordered_end(gtid);
	// TODO: ordered_wait_id
	#endif /* USE_ITT_BUILD */

	th = __kmp_threads[gtid];

	if (th->th.th_dispatch->th_dxo_fcn != 0)
	(*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
	else
	__kmp_parallel_dxo(&gtid, &cid, loc);

	#if OMPT_SUPPORT && OMPT_OPTIONAL
	OMPT_STORE_RETURN_ADDRESS(gtid);
	if (ompt_enabled.ompt_callback_mutex_released) {
	ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
	ompt_mutex_ordered,
	- (ompt_wait_id_t)&__kmp_team_from_gtid(gtid)->t.t_ordered.dt.t_value,
	+ (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
	+ ->t.t_ordered.dt.t_value,
	OMPT_LOAD_RETURN_ADDRESS(gtid));
	}
	#endif
	}

	#if KMP_USE_DYNAMIC_LOCK

	static __forceinline void
	__kmp_init_indirect_csptr(kmp_critical_name crit, ident_t const loc,
	kmp_int32 gtid, kmp_indirect_locktag_t tag) {
	// Pointer to the allocated indirect lock is written to crit, while indexing
	// is ignored.
	void *idx;
	kmp_indirect_lock_t **lck;
	lck = (kmp_indirect_lock_t **)crit;
	kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
	KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
	KMP_SET_I_LOCK_LOCATION(ilk, loc);
	KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
	KA_TRACE(20,
	("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
	#if USE_ITT_BUILD
	__kmp_itt_critical_creating(ilk->lock, loc);
	#endif
	int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
	if (status == 0) {
	#if USE_ITT_BUILD
	__kmp_itt_critical_destroyed(ilk->lock);
	#endif
	// We don't really need to destroy the unclaimed lock here since it will be
	// cleaned up at program exit.
	// KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
	}
	KMP_DEBUG_ASSERT(*lck != NULL);
	}

	// Fast-path acquire tas lock
	#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
	{ \
	kmp_tas_lock_t l = (kmp_tas_lock_t )lock; \
	kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
	kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
	if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free \|\| \
	!__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
	kmp_uint32 spins; \
	KMP_FSYNC_PREPARE(l); \
	KMP_INIT_YIELD(spins); \
	if (TCR_4(__kmp_nth) > \
	(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
	KMP_YIELD(TRUE); \
	} else { \
	KMP_YIELD_SPIN(spins); \
	} \
	kmp_backoff_t backoff = __kmp_spin_backoff_params; \
	while ( \
	KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free \|\| \
	!__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
	__kmp_spin_backoff(&backoff); \
	if (TCR_4(__kmp_nth) > \
	(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
	KMP_YIELD(TRUE); \
	} else { \
	KMP_YIELD_SPIN(spins); \
	} \
	} \
	} \
	KMP_FSYNC_ACQUIRED(l); \
	}

	// Fast-path test tas lock
	#define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
	{ \
	kmp_tas_lock_t l = (kmp_tas_lock_t )lock; \
	kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
	kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
	rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
	__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
	}

	// Fast-path release tas lock
	#define KMP_RELEASE_TAS_LOCK(lock, gtid) \
	{ KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }

	#if KMP_USE_FUTEX

	#include <sys/syscall.h>
	#include <unistd.h>
	#ifndef FUTEX_WAIT
	#define FUTEX_WAIT 0
	#endif
	#ifndef FUTEX_WAKE
	#define FUTEX_WAKE 1
	#endif

	// Fast-path acquire futex lock
	#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
	{ \
	kmp_futex_lock_t ftx = (kmp_futex_lock_t )lock; \
	kmp_int32 gtid_code = (gtid + 1) << 1; \
	KMP_MB(); \
	KMP_FSYNC_PREPARE(ftx); \
	kmp_int32 poll_val; \
	while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
	&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
	KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
	kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
	if (!cond) { \
	if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
	poll_val \| \
	KMP_LOCK_BUSY(1, futex))) { \
	continue; \
	} \
	poll_val \|= KMP_LOCK_BUSY(1, futex); \
	} \
	kmp_int32 rc; \
	if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
	NULL, NULL, 0)) != 0) { \
	continue; \
	} \
	gtid_code \|= 1; \
	} \
	KMP_FSYNC_ACQUIRED(ftx); \
	}

	// Fast-path test futex lock
	#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
	{ \
	kmp_futex_lock_t ftx = (kmp_futex_lock_t )lock; \
	if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
	KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
	KMP_FSYNC_ACQUIRED(ftx); \
	rc = TRUE; \
	} else { \
	rc = FALSE; \
	} \
	}

	// Fast-path release futex lock
	#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
	{ \
	kmp_futex_lock_t ftx = (kmp_futex_lock_t )lock; \
	KMP_MB(); \
	KMP_FSYNC_RELEASING(ftx); \
	kmp_int32 poll_val = \
	KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
	if (KMP_LOCK_STRIP(poll_val) & 1) { \
	syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
	KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
	} \
	KMP_MB(); \
	KMP_YIELD(TCR_4(__kmp_nth) > \
	(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \
	}

	#endif // KMP_USE_FUTEX

	#else // KMP_USE_DYNAMIC_LOCK

	static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
	ident_t const *loc,
	kmp_int32 gtid) {
	kmp_user_lock_p lck_pp = (kmp_user_lock_p )crit;

	// Because of the double-check, the following load doesn't need to be volatile
	kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);

	if (lck == NULL) {
	void *idx;

	// Allocate & initialize the lock.
	// Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
	lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
	__kmp_init_user_lock_with_checks(lck);
	__kmp_set_user_lock_location(lck, loc);
	#if USE_ITT_BUILD
	__kmp_itt_critical_creating(lck);
	// __kmp_itt_critical_creating() should be called before the first usage
	// of underlying lock. It is the only place where we can guarantee it. There
	// are chances the lock will destroyed with no usage, but it is not a
	// problem, because this is not real event seen by user but rather setting
	// name for object (lock). See more details in kmp_itt.h.
	#endif /* USE_ITT_BUILD */

	// Use a cmpxchg instruction to slam the start of the critical section with
	// the lock pointer. If another thread beat us to it, deallocate the lock,
	// and use the lock that the other thread allocated.
	int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);

	if (status == 0) {
	// Deallocate the lock and reload the value.
	#if USE_ITT_BUILD
	__kmp_itt_critical_destroyed(lck);
	// Let ITT know the lock is destroyed and the same memory location may be reused
	// for another purpose.
	#endif /* USE_ITT_BUILD */
	__kmp_destroy_user_lock_with_checks(lck);
	__kmp_user_lock_free(&idx, gtid, lck);
	lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
	KMP_DEBUG_ASSERT(lck != NULL);
	}
	}
	return lck;
	}

	#endif // KMP_USE_DYNAMIC_LOCK

	/*!
	@ingroup WORK_SHARING
	@param loc source location information.
	@param global_tid global thread number .
	@param crit identity of the critical section. This could be a pointer to a lock
	associated with the critical section, or some other suitably unique value.

	Enter code protected by a `critical` construct.
	This function blocks until the executing thread can enter the critical section.
	*/
	void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
	kmp_critical_name *crit) {
	#if KMP_USE_DYNAMIC_LOCK
	#if OMPT_SUPPORT && OMPT_OPTIONAL
	OMPT_STORE_RETURN_ADDRESS(global_tid);
	#endif // OMPT_SUPPORT
	__kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
	#else
	KMP_COUNT_BLOCK(OMP_CRITICAL);
	#if OMPT_SUPPORT && OMPT_OPTIONAL
	ompt_state_t prev_state = ompt_state_undefined;
	ompt_thread_info_t ti;
	#endif
	kmp_user_lock_p lck;

	KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));

	// TODO: add THR_OVHD_STATE

	KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
	KMP_CHECK_USER_LOCK_INIT();

	if ((__kmp_user_lock_kind == lk_tas) &&
	(sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
	lck = (kmp_user_lock_p)crit;
	}
	#if KMP_USE_FUTEX
	else if ((__kmp_user_lock_kind == lk_futex) &&
	(sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
	lck = (kmp_user_lock_p)crit;
	}
	#endif
	else { // ticket, queuing or drdpa
	lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
	}

	if (__kmp_env_consistency_check)
	__kmp_push_sync(global_tid, ct_critical, loc, lck);

	// since the critical directive binds to all threads, not just the current
	// team we have to check this even if we are in a serialized team.
	// also, even if we are the uber thread, we still have to conduct the lock,
	// as we have to contend with sibling threads.

	#if USE_ITT_BUILD
	__kmp_itt_critical_acquiring(lck);
	#endif /* USE_ITT_BUILD */
	#if OMPT_SUPPORT && OMPT_OPTIONAL
	OMPT_STORE_RETURN_ADDRESS(gtid);
	void *codeptr_ra = NULL;
	if (ompt_enabled.enabled) {
	ti = __kmp_threads[global_tid]->th.ompt_thread_info;
	/* OMPT state update */
	prev_state = ti.state;
	- ti.wait_id = (ompt_wait_id_t)lck;
	+ ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
	ti.state = ompt_state_wait_critical;

	/* OMPT event callback */
	codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
	if (ompt_enabled.ompt_callback_mutex_acquire) {
	ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
	ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
	- (ompt_wait_id_t)crit, codeptr_ra);
	+ (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
	}
	}
	#endif
	// Value of 'crit' should be good for using as a critical_id of the critical
	// section directive.
	__kmp_acquire_user_lock_with_checks(lck, global_tid);

	#if USE_ITT_BUILD
	__kmp_itt_critical_acquired(lck);
	#endif /* USE_ITT_BUILD */
	#if OMPT_SUPPORT && OMPT_OPTIONAL
	if (ompt_enabled.enabled) {
	/* OMPT state update */
	ti.state = prev_state;
	ti.wait_id = 0;

	/* OMPT event callback */
	if (ompt_enabled.ompt_callback_mutex_acquired) {
	ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
	- ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr_ra);
	+ ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
	}
	}
	#endif
	KMP_POP_PARTITIONED_TIMER();

	KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
	KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
	#endif // KMP_USE_DYNAMIC_LOCK
	}

	#if KMP_USE_DYNAMIC_LOCK

	// Converts the given hint to an internal lock implementation
	static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
	#if KMP_USE_TSX
	#define KMP_TSX_LOCK(seq) lockseq_##seq
	#else
	#define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
	#endif

	#if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
	#define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
	#else
	#define KMP_CPUINFO_RTM 0
	#endif

	// Hints that do not require further logic
	if (hint & kmp_lock_hint_hle)
	return KMP_TSX_LOCK(hle);
	if (hint & kmp_lock_hint_rtm)
	return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
	if (hint & kmp_lock_hint_adaptive)
	return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;

	// Rule out conflicting hints first by returning the default lock
	if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
	return __kmp_user_lock_seq;
	if ((hint & omp_lock_hint_speculative) &&
	(hint & omp_lock_hint_nonspeculative))
	return __kmp_user_lock_seq;

	// Do not even consider speculation when it appears to be contended
	if (hint & omp_lock_hint_contended)
	return lockseq_queuing;

	// Uncontended lock without speculation
	if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
	return lockseq_tas;

	// HLE lock for speculation
	if (hint & omp_lock_hint_speculative)
	return KMP_TSX_LOCK(hle);

	return __kmp_user_lock_seq;
	}

	#if OMPT_SUPPORT && OMPT_OPTIONAL
	#if KMP_USE_DYNAMIC_LOCK
	static kmp_mutex_impl_t
	__ompt_get_mutex_impl_type(void user_lock, kmp_indirect_lock_t ilock = 0) {
	if (user_lock) {
	switch (KMP_EXTRACT_D_TAG(user_lock)) {
	case 0:
	break;
	#if KMP_USE_FUTEX
	case locktag_futex:
	return kmp_mutex_impl_queuing;
	#endif
	case locktag_tas:
	return kmp_mutex_impl_spin;
	#if KMP_USE_TSX
	case locktag_hle:
	return kmp_mutex_impl_speculative;
	#endif
	default:
	return kmp_mutex_impl_none;
	}
	ilock = KMP_LOOKUP_I_LOCK(user_lock);
	}
	KMP_ASSERT(ilock);
	switch (ilock->type) {
	#if KMP_USE_TSX
	case locktag_adaptive:
	case locktag_rtm:
	return kmp_mutex_impl_speculative;
	#endif
	case locktag_nested_tas:
	return kmp_mutex_impl_spin;
	#if KMP_USE_FUTEX
	case locktag_nested_futex:
	#endif
	case locktag_ticket:
	case locktag_queuing:
	case locktag_drdpa:
	case locktag_nested_ticket:
	case locktag_nested_queuing:
	case locktag_nested_drdpa:
	return kmp_mutex_impl_queuing;
	default:
	return kmp_mutex_impl_none;
	}
	}
	#else
	// For locks without dynamic binding
	static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
	switch (__kmp_user_lock_kind) {
	case lk_tas:
	return kmp_mutex_impl_spin;
	#if KMP_USE_FUTEX
	case lk_futex:
	#endif
	case lk_ticket:
	case lk_queuing:
	case lk_drdpa:
	return kmp_mutex_impl_queuing;
	#if KMP_USE_TSX
	case lk_hle:
	case lk_rtm:
	case lk_adaptive:
	return kmp_mutex_impl_speculative;
	#endif
	default:
	return kmp_mutex_impl_none;
	}
	}
	#endif // KMP_USE_DYNAMIC_LOCK
	#endif // OMPT_SUPPORT && OMPT_OPTIONAL

	/*!
	@ingroup WORK_SHARING
	@param loc source location information.
	@param global_tid global thread number.
	@param crit identity of the critical section. This could be a pointer to a lock
	associated with the critical section, or some other suitably unique value.
	@param hint the lock hint.

	Enter code protected by a `critical` construct with a hint. The hint value is
	used to suggest a lock implementation. This function blocks until the executing
	thread can enter the critical section unless the hint suggests use of
	speculative execution and the hardware supports it.
	*/
	void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
	kmp_critical_name *crit, uint32_t hint) {
	KMP_COUNT_BLOCK(OMP_CRITICAL);
	kmp_user_lock_p lck;
	#if OMPT_SUPPORT && OMPT_OPTIONAL
	ompt_state_t prev_state = ompt_state_undefined;
	ompt_thread_info_t ti;
	// This is the case, if called from __kmpc_critical:
	void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
	if (!codeptr)
	codeptr = OMPT_GET_RETURN_ADDRESS(0);
	#endif

	KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));

	kmp_dyna_lock_t lk = (kmp_dyna_lock_t )crit;
	// Check if it is initialized.
	KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
	if (*lk == 0) {
	kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
	if (KMP_IS_D_LOCK(lckseq)) {
	KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
	KMP_GET_D_TAG(lckseq));
	} else {
	__kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
	}
	}
	// Branch for accessing the actual lock object and set operation. This
	// branching is inevitable since this lock initialization does not follow the
	// normal dispatch path (lock table is not used).
	if (KMP_EXTRACT_D_TAG(lk) != 0) {
	lck = (kmp_user_lock_p)lk;
	if (__kmp_env_consistency_check) {
	__kmp_push_sync(global_tid, ct_critical, loc, lck,
	__kmp_map_hint_to_lock(hint));
	}
	#if USE_ITT_BUILD
	__kmp_itt_critical_acquiring(lck);
	#endif
	#if OMPT_SUPPORT && OMPT_OPTIONAL
	if (ompt_enabled.enabled) {
	ti = __kmp_threads[global_tid]->th.ompt_thread_info;
	/* OMPT state update */
	prev_state = ti.state;
	- ti.wait_id = (ompt_wait_id_t)lck;
	+ ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
	ti.state = ompt_state_wait_critical;

	/* OMPT event callback */
	if (ompt_enabled.ompt_callback_mutex_acquire) {
	ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
	ompt_mutex_critical, (unsigned int)hint,
	- __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)crit, codeptr);
	+ __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
	+ codeptr);
	}
	}
	#endif
	#if KMP_USE_INLINED_TAS
	if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
	KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
	} else
	#elif KMP_USE_INLINED_FUTEX
	if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
	KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
	} else
	#endif
	{
	KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
	}
	} else {
	kmp_indirect_lock_t ilk = ((kmp_indirect_lock_t **)lk);
	lck = ilk->lock;
	if (__kmp_env_consistency_check) {
	__kmp_push_sync(global_tid, ct_critical, loc, lck,
	__kmp_map_hint_to_lock(hint));
	}
	#if USE_ITT_BUILD
	__kmp_itt_critical_acquiring(lck);
	#endif
	#if OMPT_SUPPORT && OMPT_OPTIONAL
	if (ompt_enabled.enabled) {
	ti = __kmp_threads[global_tid]->th.ompt_thread_info;
	/* OMPT state update */
	prev_state = ti.state;
	- ti.wait_id = (ompt_wait_id_t)lck;
	+ ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
	ti.state = ompt_state_wait_critical;

	/* OMPT event callback */
	if (ompt_enabled.ompt_callback_mutex_acquire) {
	ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
	ompt_mutex_critical, (unsigned int)hint,
	- __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)crit, codeptr);
	+ __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
	+ codeptr);
	}
	}
	#endif
	KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
	}
	KMP_POP_PARTITIONED_TIMER();

	#if USE_ITT_BUILD
	__kmp_itt_critical_acquired(lck);
	#endif /* USE_ITT_BUILD */
	#if OMPT_SUPPORT && OMPT_OPTIONAL
	if (ompt_enabled.enabled) {
	/* OMPT state update */
	ti.state = prev_state;
	ti.wait_id = 0;

	/* OMPT event callback */
	if (ompt_enabled.ompt_callback_mutex_acquired) {
	ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
	- ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr);
	+ ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
	}
	}
	#endif

	KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
	KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
	} // __kmpc_critical_with_hint

	#endif // KMP_USE_DYNAMIC_LOCK

	/*!
	@ingroup WORK_SHARING
	@param loc source location information.
	@param global_tid global thread number .
	@param crit identity of the critical section. This could be a pointer to a lock
	associated with the critical section, or some other suitably unique value.

	Leave a critical section, releasing any lock that was held during its execution.
	*/
	void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
	kmp_critical_name *crit) {
	kmp_user_lock_p lck;

	KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));

	#if KMP_USE_DYNAMIC_LOCK
	if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
	lck = (kmp_user_lock_p)crit;
	KMP_ASSERT(lck != NULL);
	if (__kmp_env_consistency_check) {
	__kmp_pop_sync(global_tid, ct_critical, loc);
	}
	#if USE_ITT_BUILD
	__kmp_itt_critical_releasing(lck);
	#endif
	#if KMP_USE_INLINED_TAS
	if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
	KMP_RELEASE_TAS_LOCK(lck, global_tid);
	} else
	#elif KMP_USE_INLINED_FUTEX
	if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
	KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
	} else
	#endif
	{
	KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
	}
	} else {
	kmp_indirect_lock_t *ilk =
	(kmp_indirect_lock_t )TCR_PTR(((kmp_indirect_lock_t **)crit));
	KMP_ASSERT(ilk != NULL);
	lck = ilk->lock;
	if (__kmp_env_consistency_check) {
	__kmp_pop_sync(global_tid, ct_critical, loc);
	}
	#if USE_ITT_BUILD
	__kmp_itt_critical_releasing(lck);
	#endif
	KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
	}

	#else // KMP_USE_DYNAMIC_LOCK

	if ((__kmp_user_lock_kind == lk_tas) &&
	(sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
	lck = (kmp_user_lock_p)crit;
	}
	#if KMP_USE_FUTEX
	else if ((__kmp_user_lock_kind == lk_futex) &&
	(sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
	lck = (kmp_user_lock_p)crit;
	}
	#endif
	else { // ticket, queuing or drdpa
	lck = (kmp_user_lock_p)TCR_PTR(((kmp_user_lock_p )crit));
	}

	KMP_ASSERT(lck != NULL);

	if (__kmp_env_consistency_check)
	__kmp_pop_sync(global_tid, ct_critical, loc);

	#if USE_ITT_BUILD
	__kmp_itt_critical_releasing(lck);
	#endif /* USE_ITT_BUILD */
	// Value of 'crit' should be good for using as a critical_id of the critical
	// section directive.
	__kmp_release_user_lock_with_checks(lck, global_tid);

	#endif // KMP_USE_DYNAMIC_LOCK

	#if OMPT_SUPPORT && OMPT_OPTIONAL
	/* OMPT release event triggers after lock is released; place here to trigger
	* for all #if branches */
	OMPT_STORE_RETURN_ADDRESS(global_tid);
	if (ompt_enabled.ompt_callback_mutex_released) {
	ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
	- ompt_mutex_critical, (ompt_wait_id_t)crit, OMPT_LOAD_RETURN_ADDRESS(0));
	+ ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
	+ OMPT_LOAD_RETURN_ADDRESS(0));
	}
	#endif

	KMP_POP_PARTITIONED_TIMER();
	KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
	}

	/*!
	@ingroup SYNCHRONIZATION
	@param loc source location information
	@param global_tid thread id.
	@return one if the thread should execute the master block, zero otherwise

	Start execution of a combined barrier and master. The barrier is executed inside
	this function.
	*/
	kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
	int status;

	KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));

	if (!TCR_4(__kmp_init_parallel))
	__kmp_parallel_initialize();

	if (__kmp_env_consistency_check)
	__kmp_check_barrier(global_tid, ct_barrier, loc);

	#if OMPT_SUPPORT
	ompt_frame_t *ompt_frame;
	if (ompt_enabled.enabled) {
	__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
	if (ompt_frame->enter_frame.ptr == NULL)
	ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
	OMPT_STORE_RETURN_ADDRESS(global_tid);
	}
	#endif
	#if USE_ITT_NOTIFY
	__kmp_threads[global_tid]->th.th_ident = loc;
	#endif
	status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
	#if OMPT_SUPPORT && OMPT_OPTIONAL
	if (ompt_enabled.enabled) {
	ompt_frame->enter_frame = ompt_data_none;
	}
	#endif

	return (status != 0) ? 0 : 1;
	}

	/*!
	@ingroup SYNCHRONIZATION
	@param loc source location information
	@param global_tid thread id.

	Complete the execution of a combined barrier and master. This function should
	only be called at the completion of the <tt>master</tt> code. Other threads will
	still be waiting at the barrier and this call releases them.
	*/
	void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
	KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));

	__kmp_end_split_barrier(bs_plain_barrier, global_tid);
	}

	/*!
	@ingroup SYNCHRONIZATION
	@param loc source location information
	@param global_tid thread id.
	@return one if the thread should execute the master block, zero otherwise

	Start execution of a combined barrier and master(nowait) construct.
	The barrier is executed inside this function.
	There is no equivalent "end" function, since the
	*/
	kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
	kmp_int32 ret;

	KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));

	if (!TCR_4(__kmp_init_parallel))
	__kmp_parallel_initialize();

	if (__kmp_env_consistency_check) {
	if (loc == 0) {
	KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
	}
	__kmp_check_barrier(global_tid, ct_barrier, loc);
	}

	#if OMPT_SUPPORT
	ompt_frame_t *ompt_frame;
	if (ompt_enabled.enabled) {
	__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
	if (ompt_frame->enter_frame.ptr == NULL)
	ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
	OMPT_STORE_RETURN_ADDRESS(global_tid);
	}
	#endif
	#if USE_ITT_NOTIFY
	__kmp_threads[global_tid]->th.th_ident = loc;
	#endif
	__kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
	#if OMPT_SUPPORT && OMPT_OPTIONAL
	if (ompt_enabled.enabled) {
	ompt_frame->enter_frame = ompt_data_none;
	}
	#endif

	ret = __kmpc_master(loc, global_tid);

	if (__kmp_env_consistency_check) {
	/* there's no __kmpc_end_master called; so the (stats) */
	/* actions of __kmpc_end_master are done here */

	if (global_tid < 0) {
	KMP_WARNING(ThreadIdentInvalid);
	}
	if (ret) {
	/* only one thread should do the pop since only */
	/* one did the push (see __kmpc_master()) */

	__kmp_pop_sync(global_tid, ct_master, loc);
	}
	}

	return (ret);
	}

	/* The BARRIER for a SINGLE process section is always explicit */
	/*!
	@ingroup WORK_SHARING
	@param loc source location information
	@param global_tid global thread number
	@return One if this thread should execute the single construct, zero otherwise.

	Test whether to execute a <tt>single</tt> construct.
	There are no implicit barriers in the two "single" calls, rather the compiler
	should introduce an explicit barrier if it is required.
	*/

	kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
	kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);

	if (rc) {
	// We are going to execute the single statement, so we should count it.
	KMP_COUNT_BLOCK(OMP_SINGLE);
	KMP_PUSH_PARTITIONED_TIMER(OMP_single);
	}

	#if OMPT_SUPPORT && OMPT_OPTIONAL
	kmp_info_t *this_thr = __kmp_threads[global_tid];
	kmp_team_t *team = this_thr->th.th_team;
	int tid = __kmp_tid_from_gtid(global_tid);

	if (ompt_enabled.enabled) {
	if (rc) {
	if (ompt_enabled.ompt_callback_work) {
	ompt_callbacks.ompt_callback(ompt_callback_work)(
	ompt_work_single_executor, ompt_scope_begin,
	&(team->t.ompt_team_info.parallel_data),
	&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
	1, OMPT_GET_RETURN_ADDRESS(0));
	}
	} else {
	if (ompt_enabled.ompt_callback_work) {
	ompt_callbacks.ompt_callback(ompt_callback_work)(
	ompt_work_single_other, ompt_scope_begin,
	&(team->t.ompt_team_info.parallel_data),
	&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
	1, OMPT_GET_RETURN_ADDRESS(0));
	ompt_callbacks.ompt_callback(ompt_callback_work)(
	ompt_work_single_other, ompt_scope_end,
	&(team->t.ompt_team_info.parallel_data),
	&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
	1, OMPT_GET_RETURN_ADDRESS(0));
	}
	}
	}
	#endif

	return rc;
	}

	/*!
	@ingroup WORK_SHARING
	@param loc source location information
	@param global_tid global thread number

	Mark the end of a <tt>single</tt> construct. This function should
	only be called by the thread that executed the block of code protected
	by the `single` construct.
	*/
	void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
	__kmp_exit_single(global_tid);
	KMP_POP_PARTITIONED_TIMER();

	#if OMPT_SUPPORT && OMPT_OPTIONAL
	kmp_info_t *this_thr = __kmp_threads[global_tid];
	kmp_team_t *team = this_thr->th.th_team;
	int tid = __kmp_tid_from_gtid(global_tid);

	if (ompt_enabled.ompt_callback_work) {
	ompt_callbacks.ompt_callback(ompt_callback_work)(
	ompt_work_single_executor, ompt_scope_end,
	&(team->t.ompt_team_info.parallel_data),
	&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
	OMPT_GET_RETURN_ADDRESS(0));
	}
	#endif
	}

	/*!
	@ingroup WORK_SHARING
	@param loc Source location
	@param global_tid Global thread id

	Mark the end of a statically scheduled loop.
	*/
	void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
	KMP_POP_PARTITIONED_TIMER();
	KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));

	#if OMPT_SUPPORT && OMPT_OPTIONAL
	if (ompt_enabled.ompt_callback_work) {
	ompt_work_t ompt_work_type = ompt_work_loop;
	ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
	ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
	// Determine workshare type
	if (loc != NULL) {
	if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
	ompt_work_type = ompt_work_loop;
	} else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
	ompt_work_type = ompt_work_sections;
	} else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
	ompt_work_type = ompt_work_distribute;
	} else {
	// use default set above.
	// a warning about this case is provided in __kmpc_for_static_init
	}
	KMP_DEBUG_ASSERT(ompt_work_type);
	}
	ompt_callbacks.ompt_callback(ompt_callback_work)(
	ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
	&(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
	}
	#endif
	if (__kmp_env_consistency_check)
	__kmp_pop_workshare(global_tid, ct_pdo, loc);
	}

	// User routines which take C-style arguments (call by value)
	// different from the Fortran equivalent routines

	void ompc_set_num_threads(int arg) {
	// !!!!! TODO: check the per-task binding
	__kmp_set_num_threads(arg, __kmp_entry_gtid());
	}

	void ompc_set_dynamic(int flag) {
	kmp_info_t *thread;

	/* For the thread-private implementation of the internal controls */
	thread = __kmp_entry_thread();

	__kmp_save_internal_controls(thread);

	set__dynamic(thread, flag ? TRUE : FALSE);
	}

	void ompc_set_nested(int flag) {
	kmp_info_t *thread;

	/* For the thread-private internal controls implementation */
	thread = __kmp_entry_thread();

	__kmp_save_internal_controls(thread);

	set__nested(thread, flag ? TRUE : FALSE);
	}

	void ompc_set_max_active_levels(int max_active_levels) {
	/* TO DO */
	/* we want per-task implementation of this internal control */

	/* For the per-thread internal controls implementation */
	__kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
	}

	void ompc_set_schedule(omp_sched_t kind, int modifier) {
	// !!!!! TODO: check the per-task binding
	__kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
	}

	int ompc_get_ancestor_thread_num(int level) {
	return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
	}

	int ompc_get_team_size(int level) {
	return __kmp_get_team_size(__kmp_entry_gtid(), level);
	}

	#if OMP_50_ENABLED
	/* OpenMP 5.0 Affinity Format API */

	void ompc_set_affinity_format(char const *format) {
	if (!__kmp_init_serial) {
	__kmp_serial_initialize();
	}
	__kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
	format, KMP_STRLEN(format) + 1);
	}

	size_t ompc_get_affinity_format(char *buffer, size_t size) {
	size_t format_size;
	if (!__kmp_init_serial) {
	__kmp_serial_initialize();
	}
	format_size = KMP_STRLEN(__kmp_affinity_format);
	if (buffer && size) {
	__kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
	format_size + 1);
	}
	return format_size;
	}

	void ompc_display_affinity(char const *format) {
	int gtid;
	if (!TCR_4(__kmp_init_middle)) {
	__kmp_middle_initialize();
	}
	gtid = __kmp_get_gtid();
	__kmp_aux_display_affinity(gtid, format);
	}

	size_t ompc_capture_affinity(char *buffer, size_t buf_size,
	char const *format) {
	int gtid;
	size_t num_required;
	kmp_str_buf_t capture_buf;
	if (!TCR_4(__kmp_init_middle)) {
	__kmp_middle_initialize();
	}
	gtid = __kmp_get_gtid();
	__kmp_str_buf_init(&capture_buf);
	num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
	if (buffer && buf_size) {
	__kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
	capture_buf.used + 1);
	}
	__kmp_str_buf_free(&capture_buf);
	return num_required;
	}
	#endif /* OMP_50_ENABLED */

	void kmpc_set_stacksize(int arg) {
	// __kmp_aux_set_stacksize initializes the library if needed
	__kmp_aux_set_stacksize(arg);
	}

	void kmpc_set_stacksize_s(size_t arg) {
	// __kmp_aux_set_stacksize initializes the library if needed
	__kmp_aux_set_stacksize(arg);
	}

	void kmpc_set_blocktime(int arg) {
	int gtid, tid;
	kmp_info_t *thread;

	gtid = __kmp_entry_gtid();
	tid = __kmp_tid_from_gtid(gtid);
	thread = __kmp_thread_from_gtid(gtid);

	__kmp_aux_set_blocktime(arg, thread, tid);
	}

	void kmpc_set_library(int arg) {
	// __kmp_user_set_library initializes the library if needed
	__kmp_user_set_library((enum library_type)arg);
	}

	void kmpc_set_defaults(char const *str) {
	// __kmp_aux_set_defaults initializes the library if needed
	__kmp_aux_set_defaults(str, KMP_STRLEN(str));
	}

	void kmpc_set_disp_num_buffers(int arg) {
	// ignore after initialization because some teams have already
	// allocated dispatch buffers
	if (__kmp_init_serial == 0 && arg > 0)
	__kmp_dispatch_num_buffers = arg;
	}

	int kmpc_set_affinity_mask_proc(int proc, void **mask) {
	#if defined(KMP_STUB) \|\| !KMP_AFFINITY_SUPPORTED
	return -1;
	#else
	if (!TCR_4(__kmp_init_middle)) {
	__kmp_middle_initialize();
	}
	return __kmp_aux_set_affinity_mask_proc(proc, mask);
	#endif
	}

	int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
	#if defined(KMP_STUB) \|\| !KMP_AFFINITY_SUPPORTED
	return -1;
	#else
	if (!TCR_4(__kmp_init_middle)) {
	__kmp_middle_initialize();
	}
	return __kmp_aux_unset_affinity_mask_proc(proc, mask);
	#endif
	}

	int kmpc_get_affinity_mask_proc(int proc, void **mask) {
	#if defined(KMP_STUB) \|\| !KMP_AFFINITY_SUPPORTED
	return -1;
	#else
	if (!TCR_4(__kmp_init_middle)) {
	__kmp_middle_initialize();
	}
	return __kmp_aux_get_affinity_mask_proc(proc, mask);
	#endif
	}

	/* -------------------------------------------------------------------------- */
	/*!
	@ingroup THREADPRIVATE
	@param loc source location information
	@param gtid global thread number
	@param cpy_size size of the cpy_data buffer
	@param cpy_data pointer to data to be copied
	@param cpy_func helper function to call for copying data
	@param didit flag variable: 1=single thread; 0=not single thread

	__kmpc_copyprivate implements the interface for the private data broadcast
	needed for the copyprivate clause associated with a single region in an
	OpenMP<sup>*</sup> program (both C and Fortran).
	All threads participating in the parallel region call this routine.
	One of the threads (called the single thread) should have the <tt>didit</tt>
	variable set to 1 and all other threads should have that variable set to 0.
	All threads pass a pointer to a data buffer (cpy_data) that they have built.

	The OpenMP specification forbids the use of nowait on the single region when a
	copyprivate clause is present. However, @ref __kmpc_copyprivate implements a
	barrier internally to avoid race conditions, so the code generation for the
	single region should avoid generating a barrier after the call to @ref
	__kmpc_copyprivate.

	The <tt>gtid</tt> parameter is the global thread id for the current thread.
	The <tt>loc</tt> parameter is a pointer to source location information.

	Internal implementation: The single thread will first copy its descriptor
	address (cpy_data) to a team-private location, then the other threads will each
	call the function pointed to by the parameter cpy_func, which carries out the
	copy by copying the data using the cpy_data buffer.

	The cpy_func routine used for the copy and the contents of the data area defined
	by cpy_data and cpy_size may be built in any fashion that will allow the copy
	to be done. For instance, the cpy_data buffer can hold the actual data to be
	copied or it may hold a list of pointers to the data. The cpy_func routine must
	interpret the cpy_data buffer appropriately.

	The interface to cpy_func is as follows:
	@code
	void cpy_func( void destination, void source )
	@endcode
	where void *destination is the cpy_data pointer for the thread being copied to
	and void *source is the cpy_data pointer for the thread being copied from.
	*/
	void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
	void cpy_data, void (cpy_func)(void , void ),
	kmp_int32 didit) {
	void **data_ptr;

	KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));

	KMP_MB();

	data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;

	if (__kmp_env_consistency_check) {
	if (loc == 0) {
	KMP_WARNING(ConstructIdentInvalid);
	}
	}

	// ToDo: Optimize the following two barriers into some kind of split barrier

	if (didit)
	*data_ptr = cpy_data;

	#if OMPT_SUPPORT
	ompt_frame_t *ompt_frame;
	if (ompt_enabled.enabled) {
	__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
	if (ompt_frame->enter_frame.ptr == NULL)
	ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
	OMPT_STORE_RETURN_ADDRESS(gtid);
	}
	#endif
	/* This barrier is not a barrier region boundary */
	#if USE_ITT_NOTIFY
	__kmp_threads[gtid]->th.th_ident = loc;
	#endif
	__kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);

	if (!didit)
	(cpy_func)(cpy_data, data_ptr);

	// Consider next barrier a user-visible barrier for barrier region boundaries
	// Nesting checks are already handled by the single construct checks

	#if OMPT_SUPPORT
	if (ompt_enabled.enabled) {
	OMPT_STORE_RETURN_ADDRESS(gtid);
	}
	#endif
	#if USE_ITT_NOTIFY
	__kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
	// tasks can overwrite the location)
	#endif
	__kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
	#if OMPT_SUPPORT && OMPT_OPTIONAL
	if (ompt_enabled.enabled) {
	ompt_frame->enter_frame = ompt_data_none;
	}
	#endif
	}

	/* -------------------------------------------------------------------------- */

	#define INIT_LOCK __kmp_init_user_lock_with_checks
	#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
	#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
	#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
	#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
	#define ACQUIRE_NESTED_LOCK_TIMED \
	__kmp_acquire_nested_user_lock_with_checks_timed
	#define RELEASE_LOCK __kmp_release_user_lock_with_checks
	#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
	#define TEST_LOCK __kmp_test_user_lock_with_checks
	#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
	#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
	#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks

	// TODO: Make check abort messages use location info & pass it into
	// with_checks routines

	#if KMP_USE_DYNAMIC_LOCK

	// internal lock initializer
	static __forceinline void __kmp_init_lock_with_hint(ident_t loc, void *lock,
	kmp_dyna_lockseq_t seq) {
	if (KMP_IS_D_LOCK(seq)) {
	KMP_INIT_D_LOCK(lock, seq);
	#if USE_ITT_BUILD
	__kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
	#endif
	} else {
	KMP_INIT_I_LOCK(lock, seq);
	#if USE_ITT_BUILD
	kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
	__kmp_itt_lock_creating(ilk->lock, loc);
	#endif
	}
	}

	// internal nest lock initializer
	static __forceinline void
	__kmp_init_nest_lock_with_hint(ident_t loc, void *lock,
	kmp_dyna_lockseq_t seq) {
	#if KMP_USE_TSX
	// Don't have nested lock implementation for speculative locks
	if (seq == lockseq_hle \|\| seq == lockseq_rtm \|\| seq == lockseq_adaptive)
	seq = __kmp_user_lock_seq;
	#endif
	switch (seq) {
	case lockseq_tas:
	seq = lockseq_nested_tas;
	break;
	#if KMP_USE_FUTEX
	case lockseq_futex:
	seq = lockseq_nested_futex;
	break;
	#endif
	case lockseq_ticket:
	seq = lockseq_nested_ticket;
	break;
	case lockseq_queuing:
	seq = lockseq_nested_queuing;
	break;
	case lockseq_drdpa:
	seq = lockseq_nested_drdpa;
	break;
	default:
	seq = lockseq_nested_queuing;
	}
	KMP_INIT_I_LOCK(lock, seq);
	#if USE_ITT_BUILD
	kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
	__kmp_itt_lock_creating(ilk->lock, loc);
	#endif
	}

	/* initialize the lock with a hint */
	void __kmpc_init_lock_with_hint(ident_t loc, kmp_int32 gtid, void *user_lock,
	uintptr_t hint) {
	KMP_DEBUG_ASSERT(__kmp_init_serial);
	if (__kmp_env_consistency_check && user_lock == NULL) {
	KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
	}

	__kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));

	#if OMPT_SUPPORT && OMPT_OPTIONAL
	// This is the case, if called from omp_init_lock_with_hint:
	void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
	if (!codeptr)
	codeptr = OMPT_GET_RETURN_ADDRESS(0);
	if (ompt_enabled.ompt_callback_lock_init) {
	ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
	ompt_mutex_lock, (omp_lock_hint_t)hint,
	- __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
	- codeptr);
	+ __ompt_get_mutex_impl_type(user_lock),
	+ (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
	}
	#endif
	}

	/* initialize the lock with a hint */
	void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
	void **user_lock, uintptr_t hint) {
	KMP_DEBUG_ASSERT(__kmp_init_serial);
	if (__kmp_env_consistency_check && user_lock == NULL) {
	KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
	}

	__kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));

	#if OMPT_SUPPORT && OMPT_OPTIONAL
	// This is the case, if called from omp_init_lock_with_hint:
	void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
	if (!codeptr)
	codeptr = OMPT_GET_RETURN_ADDRESS(0);
	if (ompt_enabled.ompt_callback_lock_init) {
	ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
	ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
	- __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
	- codeptr);
	+ __ompt_get_mutex_impl_type(user_lock),
	+ (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
	}
	#endif
	}

	#endif // KMP_USE_DYNAMIC_LOCK

	/* initialize the lock */
	void __kmpc_init_lock(ident_t loc, kmp_int32 gtid, void *user_lock) {
	#if KMP_USE_DYNAMIC_LOCK

	KMP_DEBUG_ASSERT(__kmp_init_serial);
	if (__kmp_env_consistency_check && user_lock == NULL) {
	KMP_FATAL(LockIsUninitialized, "omp_init_lock");
	}
	__kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);

	#if OMPT_SUPPORT && OMPT_OPTIONAL
	// This is the case, if called from omp_init_lock_with_hint:
	void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
	if (!codeptr)
	codeptr = OMPT_GET_RETURN_ADDRESS(0);
	if (ompt_enabled.ompt_callback_lock_init) {
	ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
	ompt_mutex_lock, omp_lock_hint_none,
	- __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
	- codeptr);
	+ __ompt_get_mutex_impl_type(user_lock),
	+ (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
	}
	#endif

	#else // KMP_USE_DYNAMIC_LOCK

	static char const *const func = "omp_init_lock";
	kmp_user_lock_p lck;
	KMP_DEBUG_ASSERT(__kmp_init_serial);

	if (__kmp_env_consistency_check) {
	if (user_lock == NULL) {
	KMP_FATAL(LockIsUninitialized, func);
	}
	}

	KMP_CHECK_USER_LOCK_INIT();

	if ((__kmp_user_lock_kind == lk_tas) &&
	(sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
	lck = (kmp_user_lock_p)user_lock;
	}
	#if KMP_USE_FUTEX
	else if ((__kmp_user_lock_kind == lk_futex) &&
	(sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
	lck = (kmp_user_lock_p)user_lock;
	}
	#endif
	else {
	lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
	}
	INIT_LOCK(lck);
	__kmp_set_user_lock_location(lck, loc);

	#if OMPT_SUPPORT && OMPT_OPTIONAL
	// This is the case, if called from omp_init_lock_with_hint:
	void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
	if (!codeptr)
	codeptr = OMPT_GET_RETURN_ADDRESS(0);
	if (ompt_enabled.ompt_callback_lock_init) {
	ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
	ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
	- (ompt_wait_id_t)user_lock, codeptr);
	+ (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
	}
	#endif

	#if USE_ITT_BUILD
	__kmp_itt_lock_creating(lck);
	#endif /* USE_ITT_BUILD */

	#endif // KMP_USE_DYNAMIC_LOCK
	} // __kmpc_init_lock

	/* initialize the lock */
	void __kmpc_init_nest_lock(ident_t loc, kmp_int32 gtid, void *user_lock) {
	#if KMP_USE_DYNAMIC_LOCK

	KMP_DEBUG_ASSERT(__kmp_init_serial);
	if (__kmp_env_consistency_check && user_lock == NULL) {
	KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
	}
	__kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);

	#if OMPT_SUPPORT && OMPT_OPTIONAL
	// This is the case, if called from omp_init_lock_with_hint:
	void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
	if (!codeptr)
	codeptr = OMPT_GET_RETURN_ADDRESS(0);
	if (ompt_enabled.ompt_callback_lock_init) {
	ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
	ompt_mutex_nest_lock, omp_lock_hint_none,
	- __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
	- codeptr);
	+ __ompt_get_mutex_impl_type(user_lock),
	+ (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
	}
	#endif

	#else // KMP_USE_DYNAMIC_LOCK

	static char const *const func = "omp_init_nest_lock";
	kmp_user_lock_p lck;
	KMP_DEBUG_ASSERT(__kmp_init_serial);

	if (__kmp_env_consistency_check) {
	if (user_lock == NULL) {
	KMP_FATAL(LockIsUninitialized, func);
	}
	}

	KMP_CHECK_USER_LOCK_INIT();

	if ((__kmp_user_lock_kind == lk_tas) &&
	(sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
	OMP_NEST_LOCK_T_SIZE)) {
	lck = (kmp_user_lock_p)user_lock;
	}
	#if KMP_USE_FUTEX
	else if ((__kmp_user_lock_kind == lk_futex) &&
	(sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
	OMP_NEST_LOCK_T_SIZE)) {
	lck = (kmp_user_lock_p)user_lock;
	}
	#endif
	else {
	lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
	}

	INIT_NESTED_LOCK(lck);
	__kmp_set_user_lock_location(lck, loc);

	#if OMPT_SUPPORT && OMPT_OPTIONAL
	// This is the case, if called from omp_init_lock_with_hint:
	void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
	if (!codeptr)
	codeptr = OMPT_GET_RETURN_ADDRESS(0);
	if (ompt_enabled.ompt_callback_lock_init) {
	ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
	ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
	- (ompt_wait_id_t)user_lock, codeptr);
	+ (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
	}
	#endif

	#if USE_ITT_BUILD
	__kmp_itt_lock_creating(lck);
	#endif /* USE_ITT_BUILD */

	#endif // KMP_USE_DYNAMIC_LOCK
	} // __kmpc_init_nest_lock

	void __kmpc_destroy_lock(ident_t loc, kmp_int32 gtid, void *user_lock) {
	#if KMP_USE_DYNAMIC_LOCK

	#if USE_ITT_BUILD
	kmp_user_lock_p lck;
	if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
	lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
	} else {
	lck = (kmp_user_lock_p)user_lock;
	}
	__kmp_itt_lock_destroyed(lck);
	#endif
	#if OMPT_SUPPORT && OMPT_OPTIONAL
	// This is the case, if called from omp_init_lock_with_hint:
	void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
	if (!codeptr)
	codeptr = OMPT_GET_RETURN_ADDRESS(0);
	if (ompt_enabled.ompt_callback_lock_destroy) {
	kmp_user_lock_p lck;
	if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
	lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
	} else {
	lck = (kmp_user_lock_p)user_lock;
	}
	ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
	- ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
	+ ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
	}
	#endif
	KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
	#else
	kmp_user_lock_p lck;

	if ((__kmp_user_lock_kind == lk_tas) &&
	(sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
	lck = (kmp_user_lock_p)user_lock;
	}
	#if KMP_USE_FUTEX
	else if ((__kmp_user_lock_kind == lk_futex) &&
	(sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
	lck = (kmp_user_lock_p)user_lock;
	}
	#endif
	else {
	lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
	}

	#if OMPT_SUPPORT && OMPT_OPTIONAL
	// This is the case, if called from omp_init_lock_with_hint:
	void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
	if (!codeptr)
	codeptr = OMPT_GET_RETURN_ADDRESS(0);
	if (ompt_enabled.ompt_callback_lock_destroy) {
	ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
	- ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
	+ ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
	}
	#endif

	#if USE_ITT_BUILD
	__kmp_itt_lock_destroyed(lck);
	#endif /* USE_ITT_BUILD */
	DESTROY_LOCK(lck);

	if ((__kmp_user_lock_kind == lk_tas) &&
	(sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
	;
	}
	#if KMP_USE_FUTEX
	else if ((__kmp_user_lock_kind == lk_futex) &&
	(sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
	;
	}
	#endif
	else {
	__kmp_user_lock_free(user_lock, gtid, lck);
	}
	#endif // KMP_USE_DYNAMIC_LOCK
	} // __kmpc_destroy_lock

	/* destroy the lock */
	void __kmpc_destroy_nest_lock(ident_t loc, kmp_int32 gtid, void *user_lock) {
	#if KMP_USE_DYNAMIC_LOCK

	#if USE_ITT_BUILD
	kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
	__kmp_itt_lock_destroyed(ilk->lock);
	#endif
	#if OMPT_SUPPORT && OMPT_OPTIONAL
	// This is the case, if called from omp_init_lock_with_hint:
	void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
	if (!codeptr)
	codeptr = OMPT_GET_RETURN_ADDRESS(0);
	if (ompt_enabled.ompt_callback_lock_destroy) {
	ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
	- ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
	+ ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
	}
	#endif
	KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);

	#else // KMP_USE_DYNAMIC_LOCK

	kmp_user_lock_p lck;

	if ((__kmp_user_lock_kind == lk_tas) &&
	(sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
	OMP_NEST_LOCK_T_SIZE)) {
	lck = (kmp_user_lock_p)user_lock;
	}
	#if KMP_USE_FUTEX
	else if ((__kmp_user_lock_kind == lk_futex) &&
	(sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
	OMP_NEST_LOCK_T_SIZE)) {
	lck = (kmp_user_lock_p)user_lock;
	}
	#endif
	else {
	lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
	}

	#if OMPT_SUPPORT && OMPT_OPTIONAL
	// This is the case, if called from omp_init_lock_with_hint:
	void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
	if (!codeptr)
	codeptr = OMPT_GET_RETURN_ADDRESS(0);
	if (ompt_enabled.ompt_callback_lock_destroy) {
	ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
	- ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
	+ ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
	}
	#endif

	#if USE_ITT_BUILD
	__kmp_itt_lock_destroyed(lck);
	#endif /* USE_ITT_BUILD */

	DESTROY_NESTED_LOCK(lck);

	if ((__kmp_user_lock_kind == lk_tas) &&
	(sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
	OMP_NEST_LOCK_T_SIZE)) {
	;
	}
	#if KMP_USE_FUTEX
	else if ((__kmp_user_lock_kind == lk_futex) &&
	(sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
	OMP_NEST_LOCK_T_SIZE)) {
	;
	}
	#endif
	else {
	__kmp_user_lock_free(user_lock, gtid, lck);
	}
	#endif // KMP_USE_DYNAMIC_LOCK
	} // __kmpc_destroy_nest_lock

	void __kmpc_set_lock(ident_t loc, kmp_int32 gtid, void *user_lock) {
	KMP_COUNT_BLOCK(OMP_set_lock);
	#if KMP_USE_DYNAMIC_LOCK
	int tag = KMP_EXTRACT_D_TAG(user_lock);
	#if USE_ITT_BUILD
	__kmp_itt_lock_acquiring(
	(kmp_user_lock_p)
	user_lock); // itt function will get to the right lock object.
	#endif
	#if OMPT_SUPPORT && OMPT_OPTIONAL
	// This is the case, if called from omp_init_lock_with_hint:
	void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
	if (!codeptr)
	codeptr = OMPT_GET_RETURN_ADDRESS(0);
	if (ompt_enabled.ompt_callback_mutex_acquire) {
	ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
	ompt_mutex_lock, omp_lock_hint_none,
	- __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
	- codeptr);
	+ __ompt_get_mutex_impl_type(user_lock),
	+ (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
	}
	#endif
	#if KMP_USE_INLINED_TAS
	if (tag == locktag_tas && !__kmp_env_consistency_check) {
	KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
	} else
	#elif KMP_USE_INLINED_FUTEX
	if (tag == locktag_futex && !__kmp_env_consistency_check) {
	KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
	} else
	#endif
	{
	__kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
	}
	#if USE_ITT_BUILD
	__kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
	#endif
	#if OMPT_SUPPORT && OMPT_OPTIONAL
	if (ompt_enabled.ompt_callback_mutex_acquired) {
	ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
	- ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
	+ ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
	}
	#endif

	#else // KMP_USE_DYNAMIC_LOCK

	kmp_user_lock_p lck;

	if ((__kmp_user_lock_kind == lk_tas) &&
	(sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
	lck = (kmp_user_lock_p)user_lock;
	}
	#if KMP_USE_FUTEX
	else if ((__kmp_user_lock_kind == lk_futex) &&
	(sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
	lck = (kmp_user_lock_p)user_lock;
	}
	#endif
	else {
	lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
	}

	#if USE_ITT_BUILD
	__kmp_itt_lock_acquiring(lck);
	#endif /* USE_ITT_BUILD */
	#if OMPT_SUPPORT && OMPT_OPTIONAL
	// This is the case, if called from omp_init_lock_with_hint:
	void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
	if (!codeptr)
	codeptr = OMPT_GET_RETURN_ADDRESS(0);
	if (ompt_enabled.ompt_callback_mutex_acquire) {
	ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
	ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
	- (ompt_wait_id_t)lck, codeptr);
	+ (ompt_wait_id_t)(uintptr_t)lck, codeptr);
	}
	#endif

	ACQUIRE_LOCK(lck, gtid);

	#if USE_ITT_BUILD
	__kmp_itt_lock_acquired(lck);
	#endif /* USE_ITT_BUILD */

	#if OMPT_SUPPORT && OMPT_OPTIONAL
	if (ompt_enabled.ompt_callback_mutex_acquired) {
	ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
	- ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
	+ ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
	}
	#endif

	#endif // KMP_USE_DYNAMIC_LOCK
	}

	void __kmpc_set_nest_lock(ident_t loc, kmp_int32 gtid, void *user_lock) {
	#if KMP_USE_DYNAMIC_LOCK

	#if USE_ITT_BUILD
	__kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
	#endif
	#if OMPT_SUPPORT && OMPT_OPTIONAL
	// This is the case, if called from omp_init_lock_with_hint:
	void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
	if (!codeptr)
	codeptr = OMPT_GET_RETURN_ADDRESS(0);
	if (ompt_enabled.enabled) {
	if (ompt_enabled.ompt_callback_mutex_acquire) {
	ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
	ompt_mutex_nest_lock, omp_lock_hint_none,
	- __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
	- codeptr);
	+ __ompt_get_mutex_impl_type(user_lock),
	+ (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
	}
	}
	#endif
	int acquire_status =
	KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
	(void) acquire_status;
	#if USE_ITT_BUILD
	__kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
	#endif

	#if OMPT_SUPPORT && OMPT_OPTIONAL
	if (ompt_enabled.enabled) {
	if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
	if (ompt_enabled.ompt_callback_mutex_acquired) {
	// lock_first
	ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
	- ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
	+ ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
	+ codeptr);
	}
	} else {
	if (ompt_enabled.ompt_callback_nest_lock) {
	// lock_next
	ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
	- ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr);
	+ ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
	}
	}
	}
	#endif

	#else // KMP_USE_DYNAMIC_LOCK
	int acquire_status;
	kmp_user_lock_p lck;

	if ((__kmp_user_lock_kind == lk_tas) &&
	(sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
	OMP_NEST_LOCK_T_SIZE)) {
	lck = (kmp_user_lock_p)user_lock;
	}
	#if KMP_USE_FUTEX
	else if ((__kmp_user_lock_kind == lk_futex) &&
	(sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
	OMP_NEST_LOCK_T_SIZE)) {
	lck = (kmp_user_lock_p)user_lock;
	}
	#endif
	else {
	lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
	}

	#if USE_ITT_BUILD
	__kmp_itt_lock_acquiring(lck);
	#endif /* USE_ITT_BUILD */
	#if OMPT_SUPPORT && OMPT_OPTIONAL
	// This is the case, if called from omp_init_lock_with_hint:
	void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
	if (!codeptr)
	codeptr = OMPT_GET_RETURN_ADDRESS(0);
	if (ompt_enabled.enabled) {
	if (ompt_enabled.ompt_callback_mutex_acquire) {
	ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
	ompt_mutex_nest_lock, omp_lock_hint_none,
	- __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr);
	+ __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
	+ codeptr);
	}
	}
	#endif

	ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);

	#if USE_ITT_BUILD
	__kmp_itt_lock_acquired(lck);
	#endif /* USE_ITT_BUILD */

	#if OMPT_SUPPORT && OMPT_OPTIONAL
	if (ompt_enabled.enabled) {
	if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
	if (ompt_enabled.ompt_callback_mutex_acquired) {
	// lock_first
	ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
	- ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
	+ ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
	}
	} else {
	if (ompt_enabled.ompt_callback_nest_lock) {
	// lock_next
	ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
	- ompt_scope_begin, (ompt_wait_id_t)lck, codeptr);
	+ ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
	}
	}
	}
	#endif

	#endif // KMP_USE_DYNAMIC_LOCK
	}

	void __kmpc_unset_lock(ident_t loc, kmp_int32 gtid, void *user_lock) {
	#if KMP_USE_DYNAMIC_LOCK

	int tag = KMP_EXTRACT_D_TAG(user_lock);
	#if USE_ITT_BUILD
	__kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
	#endif
	#if KMP_USE_INLINED_TAS
	if (tag == locktag_tas && !__kmp_env_consistency_check) {
	KMP_RELEASE_TAS_LOCK(user_lock, gtid);
	} else
	#elif KMP_USE_INLINED_FUTEX
	if (tag == locktag_futex && !__kmp_env_consistency_check) {
	KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
	} else
	#endif
	{
	__kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
	}

	#if OMPT_SUPPORT && OMPT_OPTIONAL
	// This is the case, if called from omp_init_lock_with_hint:
	void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
	if (!codeptr)
	codeptr = OMPT_GET_RETURN_ADDRESS(0);
	if (ompt_enabled.ompt_callback_mutex_released) {
	ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
	- ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
	+ ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
	}
	#endif

	#else // KMP_USE_DYNAMIC_LOCK

	kmp_user_lock_p lck;

	/* Can't use serial interval since not block structured */
	/* release the lock */

	if ((__kmp_user_lock_kind == lk_tas) &&
	(sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
	#if KMP_OS_LINUX && \
	(KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 \|\| KMP_ARCH_ARM \|\| KMP_ARCH_AARCH64)
	// "fast" path implemented to fix customer performance issue
	#if USE_ITT_BUILD
	__kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
	#endif /* USE_ITT_BUILD */
	TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
	KMP_MB();

	#if OMPT_SUPPORT && OMPT_OPTIONAL
	// This is the case, if called from omp_init_lock_with_hint:
	void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
	if (!codeptr)
	codeptr = OMPT_GET_RETURN_ADDRESS(0);
	if (ompt_enabled.ompt_callback_mutex_released) {
	ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
	- ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
	+ ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
	}
	#endif

	return;
	#else
	lck = (kmp_user_lock_p)user_lock;
	#endif
	}
	#if KMP_USE_FUTEX
	else if ((__kmp_user_lock_kind == lk_futex) &&
	(sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
	lck = (kmp_user_lock_p)user_lock;
	}
	#endif
	else {
	lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
	}

	#if USE_ITT_BUILD
	__kmp_itt_lock_releasing(lck);
	#endif /* USE_ITT_BUILD */

	RELEASE_LOCK(lck, gtid);

	#if OMPT_SUPPORT && OMPT_OPTIONAL
	// This is the case, if called from omp_init_lock_with_hint:
	void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
	if (!codeptr)
	codeptr = OMPT_GET_RETURN_ADDRESS(0);
	if (ompt_enabled.ompt_callback_mutex_released) {
	ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
	- ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
	+ ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
	}
	#endif

	#endif // KMP_USE_DYNAMIC_LOCK
	}

	/* release the lock */
	void __kmpc_unset_nest_lock(ident_t loc, kmp_int32 gtid, void *user_lock) {
	#if KMP_USE_DYNAMIC_LOCK

	#if USE_ITT_BUILD
	__kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
	#endif
	int release_status =
	KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
	(void) release_status;

	#if OMPT_SUPPORT && OMPT_OPTIONAL
	// This is the case, if called from omp_init_lock_with_hint:
	void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
	if (!codeptr)
	codeptr = OMPT_GET_RETURN_ADDRESS(0);
	if (ompt_enabled.enabled) {
	if (release_status == KMP_LOCK_RELEASED) {
	if (ompt_enabled.ompt_callback_mutex_released) {
	// release_lock_last
	ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
	- ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
	+ ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
	+ codeptr);
	}
	} else if (ompt_enabled.ompt_callback_nest_lock) {
	// release_lock_prev
	ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
	- ompt_scope_end, (ompt_wait_id_t)user_lock, codeptr);
	+ ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
	}
	}
	#endif

	#else // KMP_USE_DYNAMIC_LOCK

	kmp_user_lock_p lck;

	/* Can't use serial interval since not block structured */

	if ((__kmp_user_lock_kind == lk_tas) &&
	(sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
	OMP_NEST_LOCK_T_SIZE)) {
	#if KMP_OS_LINUX && \
	(KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 \|\| KMP_ARCH_ARM \|\| KMP_ARCH_AARCH64)
	// "fast" path implemented to fix customer performance issue
	kmp_tas_lock_t tl = (kmp_tas_lock_t )user_lock;
	#if USE_ITT_BUILD
	__kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
	#endif /* USE_ITT_BUILD */

	#if OMPT_SUPPORT && OMPT_OPTIONAL
	int release_status = KMP_LOCK_STILL_HELD;
	#endif

	if (--(tl->lk.depth_locked) == 0) {
	TCW_4(tl->lk.poll, 0);
	#if OMPT_SUPPORT && OMPT_OPTIONAL
	release_status = KMP_LOCK_RELEASED;
	#endif
	}
	KMP_MB();

	#if OMPT_SUPPORT && OMPT_OPTIONAL
	// This is the case, if called from omp_init_lock_with_hint:
	void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
	if (!codeptr)
	codeptr = OMPT_GET_RETURN_ADDRESS(0);
	if (ompt_enabled.enabled) {
	if (release_status == KMP_LOCK_RELEASED) {
	if (ompt_enabled.ompt_callback_mutex_released) {
	// release_lock_last
	ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
	- ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
	+ ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
	}
	} else if (ompt_enabled.ompt_callback_nest_lock) {
	// release_lock_previous
	ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
	- ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr);
	+ ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
	}
	}
	#endif

	return;
	#else
	lck = (kmp_user_lock_p)user_lock;
	#endif
	}
	#if KMP_USE_FUTEX
	else if ((__kmp_user_lock_kind == lk_futex) &&
	(sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
	OMP_NEST_LOCK_T_SIZE)) {
	lck = (kmp_user_lock_p)user_lock;
	}
	#endif
	else {
	lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
	}

	#if USE_ITT_BUILD
	__kmp_itt_lock_releasing(lck);
	#endif /* USE_ITT_BUILD */

	int release_status;
	release_status = RELEASE_NESTED_LOCK(lck, gtid);
	#if OMPT_SUPPORT && OMPT_OPTIONAL
	// This is the case, if called from omp_init_lock_with_hint:
	void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
	if (!codeptr)
	codeptr = OMPT_GET_RETURN_ADDRESS(0);
	if (ompt_enabled.enabled) {
	if (release_status == KMP_LOCK_RELEASED) {
	if (ompt_enabled.ompt_callback_mutex_released) {
	// release_lock_last
	ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
	- ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
	+ ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
	}
	} else if (ompt_enabled.ompt_callback_nest_lock) {
	// release_lock_previous
	ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
	- ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr);
	+ ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
	}
	}
	#endif

	#endif // KMP_USE_DYNAMIC_LOCK
	}

	/* try to acquire the lock */
	int __kmpc_test_lock(ident_t loc, kmp_int32 gtid, void *user_lock) {
	KMP_COUNT_BLOCK(OMP_test_lock);

	#if KMP_USE_DYNAMIC_LOCK
	int rc;
	int tag = KMP_EXTRACT_D_TAG(user_lock);
	#if USE_ITT_BUILD
	__kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
	#endif
	#if OMPT_SUPPORT && OMPT_OPTIONAL
	// This is the case, if called from omp_init_lock_with_hint:
	void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
	if (!codeptr)
	codeptr = OMPT_GET_RETURN_ADDRESS(0);
	if (ompt_enabled.ompt_callback_mutex_acquire) {
	ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
	ompt_mutex_lock, omp_lock_hint_none,
	- __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
	- codeptr);
	+ __ompt_get_mutex_impl_type(user_lock),
	+ (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
	}
	#endif
	#if KMP_USE_INLINED_TAS
	if (tag == locktag_tas && !__kmp_env_consistency_check) {
	KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
	} else
	#elif KMP_USE_INLINED_FUTEX
	if (tag == locktag_futex && !__kmp_env_consistency_check) {
	KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
	} else
	#endif
	{
	rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
	}
	if (rc) {
	#if USE_ITT_BUILD
	__kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
	#endif
	#if OMPT_SUPPORT && OMPT_OPTIONAL
	if (ompt_enabled.ompt_callback_mutex_acquired) {
	ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
	- ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
	+ ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
	}
	#endif
	return FTN_TRUE;
	} else {
	#if USE_ITT_BUILD
	__kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
	#endif
	return FTN_FALSE;
	}

	#else // KMP_USE_DYNAMIC_LOCK

	kmp_user_lock_p lck;
	int rc;

	if ((__kmp_user_lock_kind == lk_tas) &&
	(sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
	lck = (kmp_user_lock_p)user_lock;
	}
	#if KMP_USE_FUTEX
	else if ((__kmp_user_lock_kind == lk_futex) &&
	(sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
	lck = (kmp_user_lock_p)user_lock;
	}
	#endif
	else {
	lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
	}

	#if USE_ITT_BUILD
	__kmp_itt_lock_acquiring(lck);
	#endif /* USE_ITT_BUILD */
	#if OMPT_SUPPORT && OMPT_OPTIONAL
	// This is the case, if called from omp_init_lock_with_hint:
	void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
	if (!codeptr)
	codeptr = OMPT_GET_RETURN_ADDRESS(0);
	if (ompt_enabled.ompt_callback_mutex_acquire) {
	ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
	ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
	- (ompt_wait_id_t)lck, codeptr);
	+ (ompt_wait_id_t)(uintptr_t)lck, codeptr);
	}
	#endif

	rc = TEST_LOCK(lck, gtid);
	#if USE_ITT_BUILD
	if (rc) {
	__kmp_itt_lock_acquired(lck);
	} else {
	__kmp_itt_lock_cancelled(lck);
	}
	#endif /* USE_ITT_BUILD */
	#if OMPT_SUPPORT && OMPT_OPTIONAL
	if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
	ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
	- ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
	+ ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
	}
	#endif

	return (rc ? FTN_TRUE : FTN_FALSE);

	/* Can't use serial interval since not block structured */

	#endif // KMP_USE_DYNAMIC_LOCK
	}

	/* try to acquire the lock */
	int __kmpc_test_nest_lock(ident_t loc, kmp_int32 gtid, void *user_lock) {
	#if KMP_USE_DYNAMIC_LOCK
	int rc;
	#if USE_ITT_BUILD
	__kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
	#endif
	#if OMPT_SUPPORT && OMPT_OPTIONAL
	// This is the case, if called from omp_init_lock_with_hint:
	void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
	if (!codeptr)
	codeptr = OMPT_GET_RETURN_ADDRESS(0);
	if (ompt_enabled.ompt_callback_mutex_acquire) {
	ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
	ompt_mutex_nest_lock, omp_lock_hint_none,
	- __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
	- codeptr);
	+ __ompt_get_mutex_impl_type(user_lock),
	+ (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
	}
	#endif
	rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
	#if USE_ITT_BUILD
	if (rc) {
	__kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
	} else {
	__kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
	}
	#endif
	#if OMPT_SUPPORT && OMPT_OPTIONAL
	if (ompt_enabled.enabled && rc) {
	if (rc == 1) {
	if (ompt_enabled.ompt_callback_mutex_acquired) {
	// lock_first
	ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
	- ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
	+ ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
	+ codeptr);
	}
	} else {
	if (ompt_enabled.ompt_callback_nest_lock) {
	// lock_next
	ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
	- ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr);
	+ ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
	}
	}
	}
	#endif
	return rc;

	#else // KMP_USE_DYNAMIC_LOCK

	kmp_user_lock_p lck;
	int rc;

	if ((__kmp_user_lock_kind == lk_tas) &&
	(sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
	OMP_NEST_LOCK_T_SIZE)) {
	lck = (kmp_user_lock_p)user_lock;
	}
	#if KMP_USE_FUTEX
	else if ((__kmp_user_lock_kind == lk_futex) &&
	(sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
	OMP_NEST_LOCK_T_SIZE)) {
	lck = (kmp_user_lock_p)user_lock;
	}
	#endif
	else {
	lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
	}

	#if USE_ITT_BUILD
	__kmp_itt_lock_acquiring(lck);
	#endif /* USE_ITT_BUILD */

	#if OMPT_SUPPORT && OMPT_OPTIONAL
	// This is the case, if called from omp_init_lock_with_hint:
	void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
	if (!codeptr)
	codeptr = OMPT_GET_RETURN_ADDRESS(0);
	if (ompt_enabled.enabled) &&
	ompt_enabled.ompt_callback_mutex_acquire) {
	ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
	ompt_mutex_nest_lock, omp_lock_hint_none,
	- __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr);
	+ __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
	+ codeptr);
	}
	#endif

	rc = TEST_NESTED_LOCK(lck, gtid);
	#if USE_ITT_BUILD
	if (rc) {
	__kmp_itt_lock_acquired(lck);
	} else {
	__kmp_itt_lock_cancelled(lck);
	}
	#endif /* USE_ITT_BUILD */
	#if OMPT_SUPPORT && OMPT_OPTIONAL
	if (ompt_enabled.enabled && rc) {
	if (rc == 1) {
	if (ompt_enabled.ompt_callback_mutex_acquired) {
	// lock_first
	ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
	- ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
	+ ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
	}
	} else {
	if (ompt_enabled.ompt_callback_nest_lock) {
	// lock_next
	ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
	- ompt_mutex_scope_begin, (ompt_wait_id_t)lck, codeptr);
	+ ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
	}
	}
	}
	#endif
	return rc;

	/* Can't use serial interval since not block structured */

	#endif // KMP_USE_DYNAMIC_LOCK
	}

	// Interface to fast scalable reduce methods routines

	// keep the selected method in a thread local structure for cross-function
	// usage: will be used in __kmpc_end_reduce* functions;
	// another solution: to re-determine the method one more time in
	// __kmpc_end_reduce* functions (new prototype required then)
	// AT: which solution is better?
	#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
	((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))

	#define __KMP_GET_REDUCTION_METHOD(gtid) \
	(__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)

	// description of the packed_reduction_method variable: look at the macros in
	// kmp.h

	// used in a critical section reduce block
	static __forceinline void
	__kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
	kmp_critical_name *crit) {

	// this lock was visible to a customer and to the threading profile tool as a
	// serial overhead span (although it's used for an internal purpose only)
	// why was it visible in previous implementation?
	// should we keep it visible in new reduce block?
	kmp_user_lock_p lck;

	#if KMP_USE_DYNAMIC_LOCK

	kmp_dyna_lock_t lk = (kmp_dyna_lock_t )crit;
	// Check if it is initialized.
	if (*lk == 0) {
	if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
	KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
	KMP_GET_D_TAG(__kmp_user_lock_seq));
	} else {
	__kmp_init_indirect_csptr(crit, loc, global_tid,
	KMP_GET_I_TAG(__kmp_user_lock_seq));
	}
	}
	// Branch for accessing the actual lock object and set operation. This
	// branching is inevitable since this lock initialization does not follow the
	// normal dispatch path (lock table is not used).
	if (KMP_EXTRACT_D_TAG(lk) != 0) {
	lck = (kmp_user_lock_p)lk;
	KMP_DEBUG_ASSERT(lck != NULL);
	if (__kmp_env_consistency_check) {
	__kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
	}
	KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
	} else {
	kmp_indirect_lock_t ilk = ((kmp_indirect_lock_t **)lk);
	lck = ilk->lock;
	KMP_DEBUG_ASSERT(lck != NULL);
	if (__kmp_env_consistency_check) {
	__kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
	}
	KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
	}

	#else // KMP_USE_DYNAMIC_LOCK

	// We know that the fast reduction code is only emitted by Intel compilers
	// with 32 byte critical sections. If there isn't enough space, then we
	// have to use a pointer.
	if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
	lck = (kmp_user_lock_p)crit;
	} else {
	lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
	}
	KMP_DEBUG_ASSERT(lck != NULL);

	if (__kmp_env_consistency_check)
	__kmp_push_sync(global_tid, ct_critical, loc, lck);

	__kmp_acquire_user_lock_with_checks(lck, global_tid);

	#endif // KMP_USE_DYNAMIC_LOCK
	}

	// used in a critical section reduce block
	static __forceinline void
	__kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
	kmp_critical_name *crit) {

	kmp_user_lock_p lck;

	#if KMP_USE_DYNAMIC_LOCK

	if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
	lck = (kmp_user_lock_p)crit;
	if (__kmp_env_consistency_check)
	__kmp_pop_sync(global_tid, ct_critical, loc);
	KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
	} else {
	kmp_indirect_lock_t *ilk =
	(kmp_indirect_lock_t )TCR_PTR(((kmp_indirect_lock_t **)crit));
	if (__kmp_env_consistency_check)
	__kmp_pop_sync(global_tid, ct_critical, loc);
	KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
	}

	#else // KMP_USE_DYNAMIC_LOCK

	// We know that the fast reduction code is only emitted by Intel compilers
	// with 32 byte critical sections. If there isn't enough space, then we have
	// to use a pointer.
	if (__kmp_base_user_lock_size > 32) {
	lck = ((kmp_user_lock_p )crit);
	KMP_ASSERT(lck != NULL);
	} else {
	lck = (kmp_user_lock_p)crit;
	}

	if (__kmp_env_consistency_check)
	__kmp_pop_sync(global_tid, ct_critical, loc);

	__kmp_release_user_lock_with_checks(lck, global_tid);

	#endif // KMP_USE_DYNAMIC_LOCK
	} // __kmp_end_critical_section_reduce_block

	#if OMP_40_ENABLED
	static __forceinline int
	__kmp_swap_teams_for_teams_reduction(kmp_info_t th, kmp_team_t *team_p,
	int *task_state) {
	kmp_team_t *team;

	// Check if we are inside the teams construct?
	if (th->th.th_teams_microtask) {
	*team_p = team = th->th.th_team;
	if (team->t.t_level == th->th.th_teams_level) {
	// This is reduction at teams construct.
	KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
	// Let's swap teams temporarily for the reduction.
	th->th.th_info.ds.ds_tid = team->t.t_master_tid;
	th->th.th_team = team->t.t_parent;
	th->th.th_team_nproc = th->th.th_team->t.t_nproc;
	th->th.th_task_team = th->th.th_team->t.t_task_team[0];
	*task_state = th->th.th_task_state;
	th->th.th_task_state = 0;

	return 1;
	}
	}
	return 0;
	}

	static __forceinline void
	__kmp_restore_swapped_teams(kmp_info_t th, kmp_team_t team, int task_state) {
	// Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
	th->th.th_info.ds.ds_tid = 0;
	th->th.th_team = team;
	th->th.th_team_nproc = team->t.t_nproc;
	th->th.th_task_team = team->t.t_task_team[task_state];
	th->th.th_task_state = task_state;
	}
	#endif

	/* 2.a.i. Reduce Block without a terminating barrier */
	/*!
	@ingroup SYNCHRONIZATION
	@param loc source location information
	@param global_tid global thread number
	@param num_vars number of items (variables) to be reduced
	@param reduce_size size of data in bytes to be reduced
	@param reduce_data pointer to data to be reduced
	@param reduce_func callback function providing reduction operation on two
	operands and returning result of reduction in lhs_data
	@param lck pointer to the unique lock data structure
	@result 1 for the master thread, 0 for all other team threads, 2 for all team
	threads if atomic reduction needed

	The nowait version is used for a reduce clause with the nowait argument.
	*/
	kmp_int32
	__kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
	size_t reduce_size, void *reduce_data,
	void (reduce_func)(void lhs_data, void *rhs_data),
	kmp_critical_name *lck) {

	KMP_COUNT_BLOCK(REDUCE_nowait);
	int retval = 0;
	PACKED_REDUCTION_METHOD_T packed_reduction_method;
	#if OMP_40_ENABLED
	kmp_info_t *th;
	kmp_team_t *team;
	int teams_swapped = 0, task_state;
	#endif
	KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));

	// why do we need this initialization here at all?
	// Reduction clause can not be used as a stand-alone directive.

	// do not call __kmp_serial_initialize(), it will be called by
	// __kmp_parallel_initialize() if needed
	// possible detection of false-positive race by the threadchecker ???
	if (!TCR_4(__kmp_init_parallel))
	__kmp_parallel_initialize();

	// check correctness of reduce block nesting
	#if KMP_USE_DYNAMIC_LOCK
	if (__kmp_env_consistency_check)
	__kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
	#else
	if (__kmp_env_consistency_check)
	__kmp_push_sync(global_tid, ct_reduce, loc, NULL);
	#endif

	#if OMP_40_ENABLED
	th = __kmp_thread_from_gtid(global_tid);
	teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
	#endif // OMP_40_ENABLED

	// packed_reduction_method value will be reused by __kmp_end_reduce* function,
	// the value should be kept in a variable
	// the variable should be either a construct-specific or thread-specific
	// property, not a team specific property
	// (a thread can reach the next reduce block on the next construct, reduce
	// method may differ on the next construct)
	// an ident_t "loc" parameter could be used as a construct-specific property
	// (what if loc == 0?)
	// (if both construct-specific and team-specific variables were shared,
	// then unness extra syncs should be needed)
	// a thread-specific variable is better regarding two issues above (next
	// construct and extra syncs)
	// a thread-specific "th_local.reduction_method" variable is used currently
	// each thread executes 'determine' and 'set' lines (no need to execute by one
	// thread, to avoid unness extra syncs)

	packed_reduction_method = __kmp_determine_reduction_method(
	loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
	__KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);

	if (packed_reduction_method == critical_reduce_block) {

	__kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
	retval = 1;

	} else if (packed_reduction_method == empty_reduce_block) {

	// usage: if team size == 1, no synchronization is required ( Intel
	// platforms only )
	retval = 1;

	} else if (packed_reduction_method == atomic_reduce_block) {

	retval = 2;

	// all threads should do this pop here (because __kmpc_end_reduce_nowait()
	// won't be called by the code gen)
	// (it's not quite good, because the checking block has been closed by
	// this 'pop',
	// but atomic operation has not been executed yet, will be executed
	// slightly later, literally on next instruction)
	if (__kmp_env_consistency_check)
	__kmp_pop_sync(global_tid, ct_reduce, loc);

	} else if (TEST_REDUCTION_METHOD(packed_reduction_method,
	tree_reduce_block)) {

	// AT: performance issue: a real barrier here
	// AT: (if master goes slow, other threads are blocked here waiting for the
	// master to come and release them)
	// AT: (it's not what a customer might expect specifying NOWAIT clause)
	// AT: (specifying NOWAIT won't result in improvement of performance, it'll
	// be confusing to a customer)
	// AT: another implementation of barrier_gathernowait() (or some other design)
	// might go faster and be more in line with sense of NOWAIT
	// AT: TO DO: do epcc test and compare times

	// this barrier should be invisible to a customer and to the threading profile
	// tool (it's neither a terminating barrier nor customer's code, it's
	// used for an internal purpose)
	#if OMPT_SUPPORT
	// JP: can this barrier potentially leed to task scheduling?
	// JP: as long as there is a barrier in the implementation, OMPT should and
	// will provide the barrier events
	// so we set-up the necessary frame/return addresses.
	ompt_frame_t *ompt_frame;
	if (ompt_enabled.enabled) {
	__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
	if (ompt_frame->enter_frame.ptr == NULL)
	ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
	OMPT_STORE_RETURN_ADDRESS(global_tid);
	}
	#endif
	#if USE_ITT_NOTIFY
	__kmp_threads[global_tid]->th.th_ident = loc;
	#endif
	retval =
	__kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
	global_tid, FALSE, reduce_size, reduce_data, reduce_func);
	retval = (retval != 0) ? (0) : (1);
	#if OMPT_SUPPORT && OMPT_OPTIONAL
	if (ompt_enabled.enabled) {
	ompt_frame->enter_frame = ompt_data_none;
	}
	#endif

	// all other workers except master should do this pop here
	// ( none of other workers will get to __kmpc_end_reduce_nowait() )
	if (__kmp_env_consistency_check) {
	if (retval == 0) {
	__kmp_pop_sync(global_tid, ct_reduce, loc);
	}
	}

	} else {

	// should never reach this block
	KMP_ASSERT(0); // "unexpected method"
	}
	#if OMP_40_ENABLED
	if (teams_swapped) {
	__kmp_restore_swapped_teams(th, team, task_state);
	}
	#endif
	KA_TRACE(
	10,
	("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
	global_tid, packed_reduction_method, retval));

	return retval;
	}

	/*!
	@ingroup SYNCHRONIZATION
	@param loc source location information
	@param global_tid global thread id.
	@param lck pointer to the unique lock data structure

	Finish the execution of a reduce nowait.
	*/
	void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
	kmp_critical_name *lck) {

	PACKED_REDUCTION_METHOD_T packed_reduction_method;

	KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));

	packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);

	if (packed_reduction_method == critical_reduce_block) {

	__kmp_end_critical_section_reduce_block(loc, global_tid, lck);

	} else if (packed_reduction_method == empty_reduce_block) {

	// usage: if team size == 1, no synchronization is required ( on Intel
	// platforms only )

	} else if (packed_reduction_method == atomic_reduce_block) {

	// neither master nor other workers should get here
	// (code gen does not generate this call in case 2: atomic reduce block)
	// actually it's better to remove this elseif at all;
	// after removal this value will checked by the 'else' and will assert

	} else if (TEST_REDUCTION_METHOD(packed_reduction_method,
	tree_reduce_block)) {

	// only master gets here

	} else {

	// should never reach this block
	KMP_ASSERT(0); // "unexpected method"
	}

	if (__kmp_env_consistency_check)
	__kmp_pop_sync(global_tid, ct_reduce, loc);

	KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
	global_tid, packed_reduction_method));

	return;
	}

	/* 2.a.ii. Reduce Block with a terminating barrier */

	/*!
	@ingroup SYNCHRONIZATION
	@param loc source location information
	@param global_tid global thread number
	@param num_vars number of items (variables) to be reduced
	@param reduce_size size of data in bytes to be reduced
	@param reduce_data pointer to data to be reduced
	@param reduce_func callback function providing reduction operation on two
	operands and returning result of reduction in lhs_data
	@param lck pointer to the unique lock data structure
	@result 1 for the master thread, 0 for all other team threads, 2 for all team
	threads if atomic reduction needed

	A blocking reduce that includes an implicit barrier.
	*/
	kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
	size_t reduce_size, void *reduce_data,
	void (reduce_func)(void lhs_data, void *rhs_data),
	kmp_critical_name *lck) {
	KMP_COUNT_BLOCK(REDUCE_wait);
	int retval = 0;
	PACKED_REDUCTION_METHOD_T packed_reduction_method;
	#if OMP_40_ENABLED
	kmp_info_t *th;
	kmp_team_t *team;
	int teams_swapped = 0, task_state;
	#endif

	KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));

	// why do we need this initialization here at all?
	// Reduction clause can not be a stand-alone directive.

	// do not call __kmp_serial_initialize(), it will be called by
	// __kmp_parallel_initialize() if needed
	// possible detection of false-positive race by the threadchecker ???
	if (!TCR_4(__kmp_init_parallel))
	__kmp_parallel_initialize();

	// check correctness of reduce block nesting
	#if KMP_USE_DYNAMIC_LOCK
	if (__kmp_env_consistency_check)
	__kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
	#else
	if (__kmp_env_consistency_check)
	__kmp_push_sync(global_tid, ct_reduce, loc, NULL);
	#endif

	#if OMP_40_ENABLED
	th = __kmp_thread_from_gtid(global_tid);
	teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
	#endif // OMP_40_ENABLED

	packed_reduction_method = __kmp_determine_reduction_method(
	loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
	__KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);

	if (packed_reduction_method == critical_reduce_block) {

	__kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
	retval = 1;

	} else if (packed_reduction_method == empty_reduce_block) {

	// usage: if team size == 1, no synchronization is required ( Intel
	// platforms only )
	retval = 1;

	} else if (packed_reduction_method == atomic_reduce_block) {

	retval = 2;

	} else if (TEST_REDUCTION_METHOD(packed_reduction_method,
	tree_reduce_block)) {

	// case tree_reduce_block:
	// this barrier should be visible to a customer and to the threading profile
	// tool (it's a terminating barrier on constructs if NOWAIT not specified)
	#if OMPT_SUPPORT
	ompt_frame_t *ompt_frame;
	if (ompt_enabled.enabled) {
	__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
	if (ompt_frame->enter_frame.ptr == NULL)
	ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
	OMPT_STORE_RETURN_ADDRESS(global_tid);
	}
	#endif
	#if USE_ITT_NOTIFY
	__kmp_threads[global_tid]->th.th_ident =
	loc; // needed for correct notification of frames
	#endif
	retval =
	__kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
	global_tid, TRUE, reduce_size, reduce_data, reduce_func);
	retval = (retval != 0) ? (0) : (1);
	#if OMPT_SUPPORT && OMPT_OPTIONAL
	if (ompt_enabled.enabled) {
	ompt_frame->enter_frame = ompt_data_none;
	}
	#endif

	// all other workers except master should do this pop here
	// ( none of other workers except master will enter __kmpc_end_reduce() )
	if (__kmp_env_consistency_check) {
	if (retval == 0) { // 0: all other workers; 1: master
	__kmp_pop_sync(global_tid, ct_reduce, loc);
	}
	}

	} else {

	// should never reach this block
	KMP_ASSERT(0); // "unexpected method"
	}
	#if OMP_40_ENABLED
	if (teams_swapped) {
	__kmp_restore_swapped_teams(th, team, task_state);
	}
	#endif

	KA_TRACE(10,
	("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
	global_tid, packed_reduction_method, retval));

	return retval;
	}

	/*!
	@ingroup SYNCHRONIZATION
	@param loc source location information
	@param global_tid global thread id.
	@param lck pointer to the unique lock data structure

	Finish the execution of a blocking reduce.
	The <tt>lck</tt> pointer must be the same as that used in the corresponding
	start function.
	*/
	void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
	kmp_critical_name *lck) {

	PACKED_REDUCTION_METHOD_T packed_reduction_method;
	#if OMP_40_ENABLED
	kmp_info_t *th;
	kmp_team_t *team;
	int teams_swapped = 0, task_state;
	#endif

	KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));

	#if OMP_40_ENABLED
	th = __kmp_thread_from_gtid(global_tid);
	teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
	#endif // OMP_40_ENABLED

	packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);

	// this barrier should be visible to a customer and to the threading profile
	// tool (it's a terminating barrier on constructs if NOWAIT not specified)

	if (packed_reduction_method == critical_reduce_block) {

	__kmp_end_critical_section_reduce_block(loc, global_tid, lck);

	// TODO: implicit barrier: should be exposed
	#if OMPT_SUPPORT
	ompt_frame_t *ompt_frame;
	if (ompt_enabled.enabled) {
	__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
	if (ompt_frame->enter_frame.ptr == NULL)
	ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
	OMPT_STORE_RETURN_ADDRESS(global_tid);
	}
	#endif
	#if USE_ITT_NOTIFY
	__kmp_threads[global_tid]->th.th_ident = loc;
	#endif
	__kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
	#if OMPT_SUPPORT && OMPT_OPTIONAL
	if (ompt_enabled.enabled) {
	ompt_frame->enter_frame = ompt_data_none;
	}
	#endif

	} else if (packed_reduction_method == empty_reduce_block) {

	// usage: if team size==1, no synchronization is required (Intel platforms only)

	// TODO: implicit barrier: should be exposed
	#if OMPT_SUPPORT
	ompt_frame_t *ompt_frame;
	if (ompt_enabled.enabled) {
	__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
	if (ompt_frame->enter_frame.ptr == NULL)
	ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
	OMPT_STORE_RETURN_ADDRESS(global_tid);
	}
	#endif
	#if USE_ITT_NOTIFY
	__kmp_threads[global_tid]->th.th_ident = loc;
	#endif
	__kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
	#if OMPT_SUPPORT && OMPT_OPTIONAL
	if (ompt_enabled.enabled) {
	ompt_frame->enter_frame = ompt_data_none;
	}
	#endif

	} else if (packed_reduction_method == atomic_reduce_block) {

	#if OMPT_SUPPORT
	ompt_frame_t *ompt_frame;
	if (ompt_enabled.enabled) {
	__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
	if (ompt_frame->enter_frame.ptr == NULL)
	ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
	OMPT_STORE_RETURN_ADDRESS(global_tid);
	}
	#endif
	// TODO: implicit barrier: should be exposed
	#if USE_ITT_NOTIFY
	__kmp_threads[global_tid]->th.th_ident = loc;
	#endif
	__kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
	#if OMPT_SUPPORT && OMPT_OPTIONAL
	if (ompt_enabled.enabled) {
	ompt_frame->enter_frame = ompt_data_none;
	}
	#endif

	} else if (TEST_REDUCTION_METHOD(packed_reduction_method,
	tree_reduce_block)) {

	// only master executes here (master releases all other workers)
	__kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
	global_tid);

	} else {

	// should never reach this block
	KMP_ASSERT(0); // "unexpected method"
	}
	#if OMP_40_ENABLED
	if (teams_swapped) {
	__kmp_restore_swapped_teams(th, team, task_state);
	}
	#endif

	if (__kmp_env_consistency_check)
	__kmp_pop_sync(global_tid, ct_reduce, loc);

	KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
	global_tid, packed_reduction_method));

	return;
	}

	#undef __KMP_GET_REDUCTION_METHOD
	#undef __KMP_SET_REDUCTION_METHOD

	/* end of interface to fast scalable reduce routines */

	kmp_uint64 __kmpc_get_taskid() {

	kmp_int32 gtid;
	kmp_info_t *thread;

	gtid = __kmp_get_gtid();
	if (gtid < 0) {
	return 0;
	}
	thread = __kmp_thread_from_gtid(gtid);
	return thread->th.th_current_task->td_task_id;

	} // __kmpc_get_taskid

	kmp_uint64 __kmpc_get_parent_taskid() {

	kmp_int32 gtid;
	kmp_info_t *thread;
	kmp_taskdata_t *parent_task;

	gtid = __kmp_get_gtid();
	if (gtid < 0) {
	return 0;
	}
	thread = __kmp_thread_from_gtid(gtid);
	parent_task = thread->th.th_current_task->td_parent;
	return (parent_task == NULL ? 0 : parent_task->td_task_id);

	} // __kmpc_get_parent_taskid

	#if OMP_45_ENABLED
	/*!
	@ingroup WORK_SHARING
	@param loc source location information.
	@param gtid global thread number.
	@param num_dims number of associated doacross loops.
	@param dims info on loops bounds.

	Initialize doacross loop information.
	Expect compiler send us inclusive bounds,
	e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2.
	*/
	void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
	const struct kmp_dim *dims) {
	int j, idx;
	kmp_int64 last, trace_count;
	kmp_info_t *th = __kmp_threads[gtid];
	kmp_team_t *team = th->th.th_team;
	kmp_uint32 *flags;
	kmp_disp_t *pr_buf = th->th.th_dispatch;
	dispatch_shared_info_t *sh_buf;

	KA_TRACE(
	20,
	("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
	gtid, num_dims, !team->t.t_serialized));
	KMP_DEBUG_ASSERT(dims != NULL);
	KMP_DEBUG_ASSERT(num_dims > 0);

	if (team->t.t_serialized) {
	KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
	return; // no dependencies if team is serialized
	}
	KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
	idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
	// the next loop
	sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];

	// Save bounds info into allocated private buffer
	KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
	pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
	th, sizeof(kmp_int64) * (4 * num_dims + 1));
	KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
	pr_buf->th_doacross_info[0] =
	(kmp_int64)num_dims; // first element is number of dimensions
	// Save also address of num_done in order to access it later without knowing
	// the buffer index
	pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
	pr_buf->th_doacross_info[2] = dims[0].lo;
	pr_buf->th_doacross_info[3] = dims[0].up;
	pr_buf->th_doacross_info[4] = dims[0].st;
	last = 5;
	for (j = 1; j < num_dims; ++j) {
	kmp_int64
	range_length; // To keep ranges of all dimensions but the first dims[0]
	if (dims[j].st == 1) { // most common case
	// AC: should we care of ranges bigger than LLONG_MAX? (not for now)
	range_length = dims[j].up - dims[j].lo + 1;
	} else {
	if (dims[j].st > 0) {
	KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
	range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
	} else { // negative increment
	KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
	range_length =
	(kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
	}
	}
	pr_buf->th_doacross_info[last++] = range_length;
	pr_buf->th_doacross_info[last++] = dims[j].lo;
	pr_buf->th_doacross_info[last++] = dims[j].up;
	pr_buf->th_doacross_info[last++] = dims[j].st;
	}

	// Compute total trip count.
	// Start with range of dims[0] which we don't need to keep in the buffer.
	if (dims[0].st == 1) { // most common case
	trace_count = dims[0].up - dims[0].lo + 1;
	} else if (dims[0].st > 0) {
	KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
	trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
	} else { // negative increment
	KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
	trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
	}
	for (j = 1; j < num_dims; ++j) {
	trace_count = pr_buf->th_doacross_info[4 j + 1]; // use kept ranges
	}
	KMP_DEBUG_ASSERT(trace_count > 0);

	// Check if shared buffer is not occupied by other loop (idx -
	// __kmp_dispatch_num_buffers)
	if (idx != sh_buf->doacross_buf_idx) {
	// Shared buffer is occupied, wait for it to be free
	__kmp_wait_yield_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
	__kmp_eq_4, NULL);
	}
	#if KMP_32_BIT_ARCH
	// Check if we are the first thread. After the CAS the first thread gets 0,
	// others get 1 if initialization is in progress, allocated pointer otherwise.
	// Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
	flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
	(volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
	#else
	flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
	(volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
	#endif
	if (flags == NULL) {
	// we are the first thread, allocate the array of flags
	size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration
	flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
	KMP_MB();
	sh_buf->doacross_flags = flags;
	} else if (flags == (kmp_uint32 *)1) {
	#if KMP_32_BIT_ARCH
	// initialization is still in progress, need to wait
	while ((volatile kmp_int32 )&sh_buf->doacross_flags == 1)
	#else
	while ((volatile kmp_int64 )&sh_buf->doacross_flags == 1LL)
	#endif
	KMP_YIELD(TRUE);
	KMP_MB();
	} else {
	KMP_MB();
	}
	KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
	pr_buf->th_doacross_flags =
	sh_buf->doacross_flags; // save private copy in order to not
	// touch shared buffer on each iteration
	KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
	}

	void __kmpc_doacross_wait(ident_t loc, int gtid, const kmp_int64 vec) {
	kmp_int32 shft, num_dims, i;
	kmp_uint32 flag;
	kmp_int64 iter_number; // iteration number of "collapsed" loop nest
	kmp_info_t *th = __kmp_threads[gtid];
	kmp_team_t *team = th->th.th_team;
	kmp_disp_t *pr_buf;
	kmp_int64 lo, up, st;

	KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
	if (team->t.t_serialized) {
	KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
	return; // no dependencies if team is serialized
	}

	// calculate sequential iteration number and check out-of-bounds condition
	pr_buf = th->th.th_dispatch;
	KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
	num_dims = pr_buf->th_doacross_info[0];
	lo = pr_buf->th_doacross_info[2];
	up = pr_buf->th_doacross_info[3];
	st = pr_buf->th_doacross_info[4];
	if (st == 1) { // most common case
	if (vec[0] < lo \|\| vec[0] > up) {
	KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
	"bounds [%lld,%lld]\n",
	gtid, vec[0], lo, up));
	return;
	}
	iter_number = vec[0] - lo;
	} else if (st > 0) {
	if (vec[0] < lo \|\| vec[0] > up) {
	KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
	"bounds [%lld,%lld]\n",
	gtid, vec[0], lo, up));
	return;
	}
	iter_number = (kmp_uint64)(vec[0] - lo) / st;
	} else { // negative increment
	if (vec[0] > lo \|\| vec[0] < up) {
	KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
	"bounds [%lld,%lld]\n",
	gtid, vec[0], lo, up));
	return;
	}
	iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
	}
	for (i = 1; i < num_dims; ++i) {
	kmp_int64 iter, ln;
	kmp_int32 j = i * 4;
	ln = pr_buf->th_doacross_info[j + 1];
	lo = pr_buf->th_doacross_info[j + 2];
	up = pr_buf->th_doacross_info[j + 3];
	st = pr_buf->th_doacross_info[j + 4];
	if (st == 1) {
	if (vec[i] < lo \|\| vec[i] > up) {
	KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
	"bounds [%lld,%lld]\n",
	gtid, vec[i], lo, up));
	return;
	}
	iter = vec[i] - lo;
	} else if (st > 0) {
	if (vec[i] < lo \|\| vec[i] > up) {
	KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
	"bounds [%lld,%lld]\n",
	gtid, vec[i], lo, up));
	return;
	}
	iter = (kmp_uint64)(vec[i] - lo) / st;
	} else { // st < 0
	if (vec[i] > lo \|\| vec[i] < up) {
	KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
	"bounds [%lld,%lld]\n",
	gtid, vec[i], lo, up));
	return;
	}
	iter = (kmp_uint64)(lo - vec[i]) / (-st);
	}
	iter_number = iter + ln * iter_number;
	}
	shft = iter_number % 32; // use 32-bit granularity
	iter_number >>= 5; // divided by 32
	flag = 1 << shft;
	while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
	KMP_YIELD(TRUE);
	}
	KMP_MB();
	KA_TRACE(20,
	("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
	gtid, (iter_number << 5) + shft));
	}

	void __kmpc_doacross_post(ident_t loc, int gtid, const kmp_int64 vec) {
	kmp_int32 shft, num_dims, i;
	kmp_uint32 flag;
	kmp_int64 iter_number; // iteration number of "collapsed" loop nest
	kmp_info_t *th = __kmp_threads[gtid];
	kmp_team_t *team = th->th.th_team;
	kmp_disp_t *pr_buf;
	kmp_int64 lo, st;

	KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
	if (team->t.t_serialized) {
	KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
	return; // no dependencies if team is serialized
	}

	// calculate sequential iteration number (same as in "wait" but no
	// out-of-bounds checks)
	pr_buf = th->th.th_dispatch;
	KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
	num_dims = pr_buf->th_doacross_info[0];
	lo = pr_buf->th_doacross_info[2];
	st = pr_buf->th_doacross_info[4];
	if (st == 1) { // most common case
	iter_number = vec[0] - lo;
	} else if (st > 0) {
	iter_number = (kmp_uint64)(vec[0] - lo) / st;
	} else { // negative increment
	iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
	}
	for (i = 1; i < num_dims; ++i) {
	kmp_int64 iter, ln;
	kmp_int32 j = i * 4;
	ln = pr_buf->th_doacross_info[j + 1];
	lo = pr_buf->th_doacross_info[j + 2];
	st = pr_buf->th_doacross_info[j + 4];
	if (st == 1) {
	iter = vec[i] - lo;
	} else if (st > 0) {
	iter = (kmp_uint64)(vec[i] - lo) / st;
	} else { // st < 0
	iter = (kmp_uint64)(lo - vec[i]) / (-st);
	}
	iter_number = iter + ln * iter_number;
	}
	shft = iter_number % 32; // use 32-bit granularity
	iter_number >>= 5; // divided by 32
	flag = 1 << shft;
	KMP_MB();
	if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
	KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
	KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
	(iter_number << 5) + shft));
	}

	void __kmpc_doacross_fini(ident_t *loc, int gtid) {
	kmp_int32 num_done;
	kmp_info_t *th = __kmp_threads[gtid];
	kmp_team_t *team = th->th.th_team;
	kmp_disp_t *pr_buf = th->th.th_dispatch;

	KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
	if (team->t.t_serialized) {
	KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
	return; // nothing to do
	}
	num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
	if (num_done == th->th.th_team_nproc) {
	// we are the last thread, need to free shared resources
	int idx = pr_buf->th_doacross_buf_idx - 1;
	dispatch_shared_info_t *sh_buf =
	&team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
	KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
	(kmp_int64)&sh_buf->doacross_num_done);
	KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
	KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
	__kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
	sh_buf->doacross_flags = NULL;
	sh_buf->doacross_num_done = 0;
	sh_buf->doacross_buf_idx +=
	__kmp_dispatch_num_buffers; // free buffer for future re-use
	}
	// free private resources (need to keep buffer index forever)
	pr_buf->th_doacross_flags = NULL;
	__kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
	pr_buf->th_doacross_info = NULL;
	KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
	}
	#endif

	#if OMP_50_ENABLED
	int __kmpc_get_target_offload(void) {
	if (!__kmp_init_serial) {
	__kmp_serial_initialize();
	}
	return __kmp_target_offload;
	}
	#endif // OMP_50_ENABLED

	// end of file //
	Index: head/contrib/openmp/runtime/src/ompt-specific.cpp
	===================================================================
	--- head/contrib/openmp/runtime/src/ompt-specific.cpp (revision 349792)
	+++ head/contrib/openmp/runtime/src/ompt-specific.cpp (revision 349793)
	@@ -1,451 +1,451 @@
	/*
	* ompt-specific.cpp -- OMPT internal functions
	*/

	//===----------------------------------------------------------------------===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is dual licensed under the MIT and the University of Illinois Open
	// Source Licenses. See LICENSE.txt for details.
	//
	//===----------------------------------------------------------------------===//

	//******************************************************************************
	// include files
	//******************************************************************************

	#include "kmp.h"
	#include "ompt-specific.h"

	#if KMP_OS_UNIX
	#include <dlfcn.h>
	#endif

	#if KMP_OS_WINDOWS
	#define THREAD_LOCAL __declspec(thread)
	#else
	#define THREAD_LOCAL __thread
	#endif

	#define OMPT_WEAK_ATTRIBUTE KMP_WEAK_ATTRIBUTE

	//******************************************************************************
	// macros
	//******************************************************************************

	#define LWT_FROM_TEAM(team) (team)->t.ompt_serialized_team_info

	#define OMPT_THREAD_ID_BITS 16

	//******************************************************************************
	// private operations
	//******************************************************************************

	//----------------------------------------------------------
	// traverse the team and task hierarchy
	// note: __ompt_get_teaminfo and __ompt_get_task_info_object
	// traverse the hierarchy similarly and need to be
	// kept consistent
	//----------------------------------------------------------

	ompt_team_info_t __ompt_get_teaminfo(int depth, int size) {
	kmp_info_t *thr = ompt_get_thread();

	if (thr) {
	kmp_team *team = thr->th.th_team;
	if (team == NULL)
	return NULL;

	ompt_lw_taskteam_t next_lwt = LWT_FROM_TEAM(team), lwt = NULL;

	while (depth > 0) {
	// next lightweight team (if any)
	if (lwt)
	lwt = lwt->parent;

	// next heavyweight team (if any) after
	// lightweight teams are exhausted
	if (!lwt && team) {
	if (next_lwt) {
	lwt = next_lwt;
	next_lwt = NULL;
	} else {
	team = team->t.t_parent;
	if (team) {
	next_lwt = LWT_FROM_TEAM(team);
	}
	}
	}

	depth--;
	}

	if (lwt) {
	// lightweight teams have one task
	if (size)
	*size = 1;

	// return team info for lightweight team
	return &lwt->ompt_team_info;
	} else if (team) {
	// extract size from heavyweight team
	if (size)
	*size = team->t.t_nproc;

	// return team info for heavyweight team
	return &team->t.ompt_team_info;
	}
	}

	return NULL;
	}

	ompt_task_info_t *__ompt_get_task_info_object(int depth) {
	ompt_task_info_t *info = NULL;
	kmp_info_t *thr = ompt_get_thread();

	if (thr) {
	kmp_taskdata_t *taskdata = thr->th.th_current_task;
	ompt_lw_taskteam_t *lwt = NULL,
	*next_lwt = LWT_FROM_TEAM(taskdata->td_team);

	while (depth > 0) {
	// next lightweight team (if any)
	if (lwt)
	lwt = lwt->parent;

	// next heavyweight team (if any) after
	// lightweight teams are exhausted
	if (!lwt && taskdata) {
	if (next_lwt) {
	lwt = next_lwt;
	next_lwt = NULL;
	} else {
	taskdata = taskdata->td_parent;
	if (taskdata) {
	next_lwt = LWT_FROM_TEAM(taskdata->td_team);
	}
	}
	}
	depth--;
	}

	if (lwt) {
	info = &lwt->ompt_task_info;
	} else if (taskdata) {
	info = &taskdata->ompt_task_info;
	}
	}

	return info;
	}

	ompt_task_info_t *__ompt_get_scheduling_taskinfo(int depth) {
	ompt_task_info_t *info = NULL;
	kmp_info_t *thr = ompt_get_thread();

	if (thr) {
	kmp_taskdata_t *taskdata = thr->th.th_current_task;

	ompt_lw_taskteam_t *lwt = NULL,
	*next_lwt = LWT_FROM_TEAM(taskdata->td_team);

	while (depth > 0) {
	// next lightweight team (if any)
	if (lwt)
	lwt = lwt->parent;

	// next heavyweight team (if any) after
	// lightweight teams are exhausted
	if (!lwt && taskdata) {
	// first try scheduling parent (for explicit task scheduling)
	if (taskdata->ompt_task_info.scheduling_parent) {
	taskdata = taskdata->ompt_task_info.scheduling_parent;
	} else if (next_lwt) {
	lwt = next_lwt;
	next_lwt = NULL;
	} else {
	// then go for implicit tasks
	taskdata = taskdata->td_parent;
	if (taskdata) {
	next_lwt = LWT_FROM_TEAM(taskdata->td_team);
	}
	}
	}
	depth--;
	}

	if (lwt) {
	info = &lwt->ompt_task_info;
	} else if (taskdata) {
	info = &taskdata->ompt_task_info;
	}
	}

	return info;
	}

	//******************************************************************************
	// interface operations
	//******************************************************************************

	//----------------------------------------------------------
	// thread support
	//----------------------------------------------------------

	ompt_data_t *__ompt_get_thread_data_internal() {
	if (__kmp_get_gtid() >= 0) {
	kmp_info_t *thread = ompt_get_thread();
	if (thread == NULL)
	return NULL;
	return &(thread->th.ompt_thread_info.thread_data);
	}
	return NULL;
	}

	//----------------------------------------------------------
	// state support
	//----------------------------------------------------------

	void __ompt_thread_assign_wait_id(void *variable) {
	kmp_info_t *ti = ompt_get_thread();

	- ti->th.ompt_thread_info.wait_id = (ompt_wait_id_t)variable;
	+ ti->th.ompt_thread_info.wait_id = (ompt_wait_id_t)(uintptr_t)variable;
	}

	int __ompt_get_state_internal(ompt_wait_id_t *omp_wait_id) {
	kmp_info_t *ti = ompt_get_thread();

	if (ti) {
	if (omp_wait_id)
	*omp_wait_id = ti->th.ompt_thread_info.wait_id;
	return ti->th.ompt_thread_info.state;
	}
	return ompt_state_undefined;
	}

	//----------------------------------------------------------
	// parallel region support
	//----------------------------------------------------------

	int __ompt_get_parallel_info_internal(int ancestor_level,
	ompt_data_t **parallel_data,
	int *team_size) {
	if (__kmp_get_gtid() >= 0) {
	ompt_team_info_t *info;
	if (team_size) {
	info = __ompt_get_teaminfo(ancestor_level, team_size);
	} else {
	info = __ompt_get_teaminfo(ancestor_level, NULL);
	}
	if (parallel_data) {
	*parallel_data = info ? &(info->parallel_data) : NULL;
	}
	return info ? 2 : 0;
	} else {
	return 0;
	}
	}

	//----------------------------------------------------------
	// lightweight task team support
	//----------------------------------------------------------

	void __ompt_lw_taskteam_init(ompt_lw_taskteam_t lwt, kmp_info_t thr, int gtid,
	ompt_data_t ompt_pid, void codeptr) {
	// initialize parallel_data with input, return address to parallel_data on
	// exit
	lwt->ompt_team_info.parallel_data = *ompt_pid;
	lwt->ompt_team_info.master_return_address = codeptr;
	lwt->ompt_task_info.task_data.value = 0;
	lwt->ompt_task_info.frame.enter_frame = ompt_data_none;
	lwt->ompt_task_info.frame.exit_frame = ompt_data_none;
	lwt->ompt_task_info.scheduling_parent = NULL;
	lwt->ompt_task_info.deps = NULL;
	lwt->ompt_task_info.ndeps = 0;
	lwt->heap = 0;
	lwt->parent = 0;
	}

	void __ompt_lw_taskteam_link(ompt_lw_taskteam_t lwt, kmp_info_t thr,
	int on_heap) {
	ompt_lw_taskteam_t *link_lwt = lwt;
	if (thr->th.th_team->t.t_serialized >
	1) { // we already have a team, so link the new team and swap values
	if (on_heap) { // the lw_taskteam cannot stay on stack, allocate it on heap
	link_lwt =
	(ompt_lw_taskteam_t *)__kmp_allocate(sizeof(ompt_lw_taskteam_t));
	}
	link_lwt->heap = on_heap;

	// would be swap in the (on_stack) case.
	ompt_team_info_t tmp_team = lwt->ompt_team_info;
	link_lwt->ompt_team_info = *OMPT_CUR_TEAM_INFO(thr);
	*OMPT_CUR_TEAM_INFO(thr) = tmp_team;

	ompt_task_info_t tmp_task = lwt->ompt_task_info;
	link_lwt->ompt_task_info = *OMPT_CUR_TASK_INFO(thr);
	*OMPT_CUR_TASK_INFO(thr) = tmp_task;

	// link the taskteam into the list of taskteams:
	ompt_lw_taskteam_t *my_parent =
	thr->th.th_team->t.ompt_serialized_team_info;
	link_lwt->parent = my_parent;
	thr->th.th_team->t.ompt_serialized_team_info = link_lwt;
	} else {
	// this is the first serialized team, so we just store the values in the
	// team and drop the taskteam-object
	*OMPT_CUR_TEAM_INFO(thr) = lwt->ompt_team_info;
	*OMPT_CUR_TASK_INFO(thr) = lwt->ompt_task_info;
	}
	}

	void __ompt_lw_taskteam_unlink(kmp_info_t *thr) {
	ompt_lw_taskteam_t *lwtask = thr->th.th_team->t.ompt_serialized_team_info;
	if (lwtask) {
	thr->th.th_team->t.ompt_serialized_team_info = lwtask->parent;

	ompt_team_info_t tmp_team = lwtask->ompt_team_info;
	lwtask->ompt_team_info = *OMPT_CUR_TEAM_INFO(thr);
	*OMPT_CUR_TEAM_INFO(thr) = tmp_team;

	ompt_task_info_t tmp_task = lwtask->ompt_task_info;
	lwtask->ompt_task_info = *OMPT_CUR_TASK_INFO(thr);
	*OMPT_CUR_TASK_INFO(thr) = tmp_task;

	if (lwtask->heap) {
	__kmp_free(lwtask);
	lwtask = NULL;
	}
	}
	// return lwtask;
	}

	//----------------------------------------------------------
	// task support
	//----------------------------------------------------------

	int __ompt_get_task_info_internal(int ancestor_level, int *type,
	ompt_data_t **task_data,
	ompt_frame_t **task_frame,
	ompt_data_t **parallel_data,
	int *thread_num) {
	if (__kmp_get_gtid() < 0)
	return 0;

	if (ancestor_level < 0)
	return 0;

	// copied from __ompt_get_scheduling_taskinfo
	ompt_task_info_t *info = NULL;
	ompt_team_info_t *team_info = NULL;
	kmp_info_t *thr = ompt_get_thread();
	int level = ancestor_level;

	if (thr) {
	kmp_taskdata_t *taskdata = thr->th.th_current_task;
	if (taskdata == NULL)
	return 0;
	kmp_team team = thr->th.th_team, prev_team = NULL;
	if (team == NULL)
	return 0;
	ompt_lw_taskteam_t *lwt = NULL,
	*next_lwt = LWT_FROM_TEAM(taskdata->td_team),
	*prev_lwt = NULL;

	while (ancestor_level > 0) {
	// needed for thread_num
	prev_team = team;
	prev_lwt = lwt;
	// next lightweight team (if any)
	if (lwt)
	lwt = lwt->parent;

	// next heavyweight team (if any) after
	// lightweight teams are exhausted
	if (!lwt && taskdata) {
	// first try scheduling parent (for explicit task scheduling)
	if (taskdata->ompt_task_info.scheduling_parent) {
	taskdata = taskdata->ompt_task_info.scheduling_parent;
	} else if (next_lwt) {
	lwt = next_lwt;
	next_lwt = NULL;
	} else {
	// then go for implicit tasks
	taskdata = taskdata->td_parent;
	if (team == NULL)
	return 0;
	team = team->t.t_parent;
	if (taskdata) {
	next_lwt = LWT_FROM_TEAM(taskdata->td_team);
	}
	}
	}
	ancestor_level--;
	}

	if (lwt) {
	info = &lwt->ompt_task_info;
	team_info = &lwt->ompt_team_info;
	if (type) {
	*type = ompt_task_implicit;
	}
	} else if (taskdata) {
	info = &taskdata->ompt_task_info;
	team_info = &team->t.ompt_team_info;
	if (type) {
	if (taskdata->td_parent) {
	*type = (taskdata->td_flags.tasktype ? ompt_task_explicit
	: ompt_task_implicit) \|
	TASK_TYPE_DETAILS_FORMAT(taskdata);
	} else {
	*type = ompt_task_initial;
	}
	}
	}
	if (task_data) {
	*task_data = info ? &info->task_data : NULL;
	}
	if (task_frame) {
	// OpenMP spec asks for the scheduling task to be returned.
	*task_frame = info ? &info->frame : NULL;
	}
	if (parallel_data) {
	*parallel_data = team_info ? &(team_info->parallel_data) : NULL;
	}
	if (thread_num) {
	if (level == 0)
	*thread_num = __kmp_get_tid();
	else if (prev_lwt)
	*thread_num = 0;
	else
	*thread_num = prev_team->t.t_master_tid;
	// *thread_num = team->t.t_master_tid;
	}
	return info ? 2 : 0;
	}
	return 0;
	}

	//----------------------------------------------------------
	// team support
	//----------------------------------------------------------

	void __ompt_team_assign_id(kmp_team_t *team, ompt_data_t ompt_pid) {
	team->t.ompt_team_info.parallel_data = ompt_pid;
	}

	//----------------------------------------------------------
	// misc
	//----------------------------------------------------------

	static uint64_t __ompt_get_unique_id_internal() {
	static uint64_t thread = 1;
	static THREAD_LOCAL uint64_t ID = 0;
	if (ID == 0) {
	uint64_t new_thread = KMP_TEST_THEN_INC64((kmp_int64 *)&thread);
	ID = new_thread << (sizeof(uint64_t) * 8 - OMPT_THREAD_ID_BITS);
	}
	return ++ID;
	}
	Index: head/contrib/openmp
	===================================================================
	--- head/contrib/openmp (revision 349792)
	+++ head/contrib/openmp (revision 349793)

	Property changes on: head/contrib/openmp
	___________________________________________________________________
	Modified: svn:mergeinfo
	## -0,0 +0,1 ##
	Merged /vendor/llvm-openmp/dist-release_80:r348963-349790
	Index: head/lib/clang/include/clang/Basic/Version.inc
	===================================================================
	--- head/lib/clang/include/clang/Basic/Version.inc (revision 349792)
	+++ head/lib/clang/include/clang/Basic/Version.inc (revision 349793)
	@@ -1,11 +1,11 @@
	/* $FreeBSD$ */

	#define CLANG_VERSION 8.0.1
	#define CLANG_VERSION_STRING "8.0.1"
	#define CLANG_VERSION_MAJOR 8
	#define CLANG_VERSION_MINOR 0
	#define CLANG_VERSION_PATCHLEVEL 1

	#define CLANG_VENDOR "FreeBSD "

	-#define SVN_REVISION "363030"
	+#define SVN_REVISION "364487"
	Index: head/lib/clang/include/lld/Common/Version.inc
	===================================================================
	--- head/lib/clang/include/lld/Common/Version.inc (revision 349792)
	+++ head/lib/clang/include/lld/Common/Version.inc (revision 349793)
	@@ -1,10 +1,10 @@
	// $FreeBSD$

	#define LLD_VERSION 8.0.1
	#define LLD_VERSION_STRING "8.0.1"
	#define LLD_VERSION_MAJOR 8
	#define LLD_VERSION_MINOR 0

	#define LLD_REPOSITORY_STRING "FreeBSD"
	// <Upstream revision at import>-<Local identifier in __FreeBSD_version style>
	-#define LLD_REVISION_STRING "363030-1300004"
	+#define LLD_REVISION_STRING "364487-1300004"
	Index: head/lib/clang/include/llvm/Support/VCSRevision.h
	===================================================================
	--- head/lib/clang/include/llvm/Support/VCSRevision.h (revision 349792)
	+++ head/lib/clang/include/llvm/Support/VCSRevision.h (revision 349793)
	@@ -1,2 +1,2 @@
	/* $FreeBSD$ */
	-#define LLVM_REVISION "svn-r363030"
	+#define LLVM_REVISION "svn-r364487"

File Metadata

Mime Type: text/x-c++
Expires: Sat, Dec 6, 6:55 AM (2 d)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 26642869
Default Alt Text: (634 KB)

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions